#include <cinttypes>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <pthread.h>
#include <cassert>
#include <chrono>
#include <iostream>

#include "globals.h"
#include "utils/graphIO.h"
#include "utils/debug.h"
#include "utils/diffAnalysis.h"

#include "representations/tradListGraphR.h"
#include "representations/metisR.h"
#include "representations/diffGraphR.h"

#include "coders/myVarintByteBasedCoder.h"
#include "coders/myVarintByteBasedNoDiffCoder.h"
#include "coders/myVarintByteBasedRecCoder.h"

#include "offsetStructures/offsetsBV_RRR.h"

using namespace std;

std::string GRAPH_S;
std::string GRAPH_FILE_T;
std::string GRAPH_FILE_R;
std::string dir;
std::string file;
ofstream f_cat;

int MAX_ITERATIONS = 10000;
int ITERATIONS_PER_WARMUP = 100;
int MICRO_SECONDS = 1000000;
int MAX_THREADS = 26;

TradListGraphR* rep_original_t = NULL;

MetisR* rep_original_m = NULL;
SimpleRecGraphR* rep_bisected_r = NULL;

DiffGraphR<OffsetsBV_RRR, MyVarintByteBasedCoder>* DMd;
DiffGraphR<OffsetsBV_RRR, MyVarintByteBasedCoder>* RB;
DiffGraphR<OffsetsBV_RRR, MyVarintByteBasedRecCoder>* BR;
DiffGraphR<OffsetsBV_RRR, MyVarintByteBasedNoDiffCoder>* DMf;

pthread_barrier_t thread_barrier;

typedef struct thread_params_t {
  int thread_id;
  int num_threads;
} thread_params;

bool hack = true;

std::chrono::time_point<std::chrono::system_clock> s, e, start_time, end_time;


void measure_deg_DMd(int thread_id, int num_threads, v_id* rand_v) {
  pthread_barrier_wait(&thread_barrier);
  for (int i = 0; i < ITERATIONS_PER_WARMUP; i++) {
    v_id deg = DMd->getVertexDegree(rand_v[i]);
  }
  pthread_barrier_wait(&thread_barrier);
  for (int i = ITERATIONS_PER_WARMUP; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if (thread_id == 0) {
      f_cat.open(dir + "/" + file + "_DMd_deg", ios::app);
      start_time = std::chrono::high_resolution_clock::now();
    }
    v_id deg = DMd->getVertexDegree(rand_v[i]);
    if (thread_id == 0) {
      end_time = std::chrono::high_resolution_clock::now();
      std::chrono::duration<double> elapsed_seconds = end_time-start_time;
      f_cat << GRAPH_S << " deg [us]: " << elapsed_seconds.count()*MICRO_SECONDS << endl;
      f_cat.close();
    }
  } 
  
}

void measure_N_DMd(int thread_id, int num_threads, v_id* rand_v) {
  pthread_barrier_wait(&thread_barrier);
  for (int i = 0; i < ITERATIONS_PER_WARMUP; i++) {
    v_id* N = DMd->getVertexNeighbors(rand_v[i]);
  }
  pthread_barrier_wait(&thread_barrier);
  for (int i = ITERATIONS_PER_WARMUP; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if (thread_id == 0) {
      f_cat.open(dir + "/" + file + "_DMd_N", ios::app);
      start_time = std::chrono::high_resolution_clock::now();
    }
    v_id* N = DMd->getVertexNeighbors(rand_v[i]);
    if (thread_id == 0) {
      end_time = std::chrono::high_resolution_clock::now();
      std::chrono::duration<double> elapsed_seconds = end_time-start_time;
      f_cat << GRAPH_S << " neigh [us]: " << elapsed_seconds.count()*MICRO_SECONDS << endl;
      f_cat.close();
    }
  } 
  
}

void measure_deg_DMf(int thread_id, int num_threads, v_id* rand_v) {
  pthread_barrier_wait(&thread_barrier);
  for (int i = 0; i < ITERATIONS_PER_WARMUP; i++) {
    v_id deg = DMf->getVertexDegree(rand_v[i]);
  }
  pthread_barrier_wait(&thread_barrier);
  for (int i = ITERATIONS_PER_WARMUP; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if (thread_id == 0) {
      f_cat.open(dir + "/" + file + "_DMf_deg", ios::app);
      start_time = std::chrono::high_resolution_clock::now();
    }
    v_id deg = DMf->getVertexDegree(rand_v[i]);
    if (thread_id == 0) {
      end_time = std::chrono::high_resolution_clock::now();
      std::chrono::duration<double> elapsed_seconds = end_time-start_time;
      f_cat << GRAPH_S << " deg [us]: " << elapsed_seconds.count()*MICRO_SECONDS << endl;
      f_cat.close();
    }
  } 
  
}

void measure_N_DMf(int thread_id, int num_threads, v_id* rand_v) {
  pthread_barrier_wait(&thread_barrier);
  for (int i = 0; i < ITERATIONS_PER_WARMUP; i++) {
    v_id* N = DMf->getVertexNeighbors(rand_v[i]);
  }
  pthread_barrier_wait(&thread_barrier);
  for (int i = ITERATIONS_PER_WARMUP; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if (thread_id == 0) {
      f_cat.open(dir + "/" + file + "_DMf_N", ios::app);
      start_time = std::chrono::high_resolution_clock::now();
    }
    v_id* N = DMf->getVertexNeighbors(rand_v[i]);
    if (thread_id == 0) {
      end_time = std::chrono::high_resolution_clock::now();
      std::chrono::duration<double> elapsed_seconds = end_time-start_time;
      f_cat << GRAPH_S << " neigh [us]: " << elapsed_seconds.count()*MICRO_SECONDS << endl;
      f_cat.close();
    }
  } 
  
}

void measure_deg_RB(int thread_id, int num_threads, v_id* rand_v) {
  pthread_barrier_wait(&thread_barrier);
  for (int i = 0; i < ITERATIONS_PER_WARMUP; i++) {
    v_id deg = RB->getVertexDegree(rand_v[i]);
  }
  pthread_barrier_wait(&thread_barrier);
  for (int i = ITERATIONS_PER_WARMUP; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if (thread_id == 0) {
      f_cat.open(dir + "/" + file + "_RB_deg", ios::app);
      start_time = std::chrono::high_resolution_clock::now();
    }
    v_id deg = RB->getVertexDegree(rand_v[i]);
    if (thread_id == 0) {
      end_time = std::chrono::high_resolution_clock::now();
      std::chrono::duration<double> elapsed_seconds = end_time-start_time;
      f_cat << GRAPH_S << " deg [us]: " << elapsed_seconds.count()*MICRO_SECONDS << endl;
      f_cat.close();
    }
  } 
  
}

void measure_N_RB(int thread_id, int num_threads, v_id* rand_v) {
  pthread_barrier_wait(&thread_barrier);
  for (int i = 0; i < ITERATIONS_PER_WARMUP; i++) {
    v_id* N = RB->getVertexNeighbors(rand_v[i]);
  }
  pthread_barrier_wait(&thread_barrier);
  for (int i = ITERATIONS_PER_WARMUP; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if (thread_id == 0) {
      f_cat.open(dir + "/" + file + "_RB_N", ios::app);
      start_time = std::chrono::high_resolution_clock::now();
    }
    v_id* N = RB->getVertexNeighbors(rand_v[i]);
    if (thread_id == 0) {
      end_time = std::chrono::high_resolution_clock::now();
      std::chrono::duration<double> elapsed_seconds = end_time-start_time;
      f_cat << GRAPH_S << " neigh [us]: " << elapsed_seconds.count()*MICRO_SECONDS << endl;
      f_cat.close();
    }
  } 
  
}

void measure_deg_BR(int thread_id, int num_threads, v_id* rand_v) {
  pthread_barrier_wait(&thread_barrier);
  for (int i = 0; i < ITERATIONS_PER_WARMUP; i++) {
    v_id deg = BR->getVertexDegree(rand_v[i]);
  }
  pthread_barrier_wait(&thread_barrier);
  for (int i = ITERATIONS_PER_WARMUP; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if (thread_id == 0) {
      f_cat.open(dir + "/" + file + "_BR_deg", ios::app);
      start_time = std::chrono::high_resolution_clock::now();
    }
    v_id deg = BR->getVertexDegree(rand_v[i]);
    if (thread_id == 0) {
      end_time = std::chrono::high_resolution_clock::now();
      std::chrono::duration<double> elapsed_seconds = end_time-start_time;
      f_cat << GRAPH_S << " deg [us]: " << elapsed_seconds.count()*MICRO_SECONDS << endl;
      f_cat.close();
    }
  } 
  
}

void measure_N_BR(int thread_id, int num_threads, v_id* rand_v) {
  pthread_barrier_wait(&thread_barrier);
  for (int i = 0; i < ITERATIONS_PER_WARMUP; i++) {
    v_id* N = BR->getVertexNeighbors(rand_v[i]);
  }
  pthread_barrier_wait(&thread_barrier);
  for (int i = ITERATIONS_PER_WARMUP; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if (thread_id == 0) {
      f_cat.open(dir + "/" + file + "_BR_N", ios::app);
      start_time = std::chrono::high_resolution_clock::now();
    }
    v_id* N = BR->getVertexNeighbors(rand_v[i]);
    if (thread_id == 0) {
      end_time = std::chrono::high_resolution_clock::now();
      std::chrono::duration<double> elapsed_seconds = end_time-start_time;
      f_cat << GRAPH_S << " neigh [us]: " << elapsed_seconds.count()*MICRO_SECONDS << endl;
      f_cat.close();
    }
  } 
  
}

void measure_deg_trad(int thread_id, int num_threads, v_id* rand_v) {
  pthread_barrier_wait(&thread_barrier);
  for (int i = 0; i < ITERATIONS_PER_WARMUP; i++) {
    v_id deg = rep_original_m->getVertexDegree(rand_v[i]);
  }
  pthread_barrier_wait(&thread_barrier);
  for (int i = ITERATIONS_PER_WARMUP; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if (thread_id == 0) {
      f_cat.open(dir + "/" + file + "_trad_deg", ios::app);
      start_time = std::chrono::high_resolution_clock::now();
    }
    v_id deg = rep_original_m->getVertexDegree(rand_v[i]);
    if (thread_id == 0) {
      end_time = std::chrono::high_resolution_clock::now();
      std::chrono::duration<double> elapsed_seconds = end_time-start_time;
      f_cat << GRAPH_S << " deg [us]: " << elapsed_seconds.count()*MICRO_SECONDS << endl;
      f_cat.close();
    }
  } 
  
}

void measure_N_trad(int thread_id, int num_threads, v_id* rand_v) {
  pthread_barrier_wait(&thread_barrier);
  for (int i = 0; i < ITERATIONS_PER_WARMUP; i++) {
    v_id* N = rep_original_m->getVertexNeighbors(rand_v[i]);
  }
  pthread_barrier_wait(&thread_barrier);
  for (int i = ITERATIONS_PER_WARMUP; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if (thread_id == 0) {
      f_cat.open(dir + "/" + file + "_trad_N", ios::app);
      start_time = std::chrono::high_resolution_clock::now();
    }
    v_id* N = rep_original_m->getVertexNeighbors(rand_v[i]);
    if (thread_id == 0) {
      end_time = std::chrono::high_resolution_clock::now();
      std::chrono::duration<double> elapsed_seconds = end_time-start_time;
      f_cat << GRAPH_S << " neigh [us]: " << elapsed_seconds.count()*MICRO_SECONDS << endl;
      f_cat.close();
    }
  } 
  
}

void* measure(void* args) {
  thread_params* params = (thread_params*)args;

  int thread_id = params->thread_id;
  int num_threads = params->num_threads;

  v_id* rand_v = new v_id[MAX_ITERATIONS + ITERATIONS_PER_WARMUP]();


  for(int64_t i = 0; i < (MAX_ITERATIONS + ITERATIONS_PER_WARMUP); ++i) {
    rand_v[i] = rand() % rep_original_t->n_;
  }

  if (thread_id == 0) {
    cout << "Measuring deg for DMd with " << num_threads << " threads" << endl;
  }
  measure_deg_DMd(thread_id, num_threads, rand_v);
  pthread_barrier_wait(&thread_barrier);

  if (thread_id == 0) {
    cout << "Measuring neigh for DMd with " << num_threads << " threads" << endl;
  }
  measure_N_DMd(thread_id, num_threads, rand_v);
  pthread_barrier_wait(&thread_barrier);

  if (thread_id == 0) {
    cout << "Measuring deg for DMf with " << num_threads << " threads" << endl;
  }
  measure_deg_DMf(thread_id, num_threads, rand_v);
  pthread_barrier_wait(&thread_barrier);

  if (thread_id == 0) {
    cout << "Measuring neigh for DMf with " << num_threads << " threads" << endl;
  }
  measure_N_DMf(thread_id, num_threads, rand_v);
  pthread_barrier_wait(&thread_barrier);

  if (thread_id == 0) {
    cout << "Measuring deg for RB with " << num_threads << " threads" << endl;
  }
  measure_deg_RB(thread_id, num_threads, rand_v);
  pthread_barrier_wait(&thread_barrier);

  if (thread_id == 0) {
    cout << "Measuring neigh for RB with " << num_threads << " threads" << endl;
  }
  measure_N_RB(thread_id, num_threads, rand_v);
  pthread_barrier_wait(&thread_barrier);

  if (thread_id == 0) {
    cout << "Measuring deg for BR with " << num_threads << " threads" << endl;
  }
  measure_deg_BR(thread_id, num_threads, rand_v);
  pthread_barrier_wait(&thread_barrier);

  if (thread_id == 0) {
    cout << "Measuring neigh for BR with " << num_threads << " threads" << endl;
  }
  measure_N_BR(thread_id, num_threads, rand_v);
  pthread_barrier_wait(&thread_barrier);

  if (thread_id == 0) {
    cout << "Measuring deg for trad with " << num_threads << " threads" << endl;
  }
  measure_deg_trad(thread_id, num_threads, rand_v);
  pthread_barrier_wait(&thread_barrier);

  if (thread_id == 0) {
    cout << "Measuring neigh for trad with " << num_threads << " threads" << endl;
  }
  measure_N_trad(thread_id, num_threads, rand_v);
  pthread_barrier_wait(&thread_barrier);

  free(params);

  delete [] rand_v;

  pthread_exit(NULL);
}


int main (int argc, char *argv[]) {
  GRAPH_S = argv[1];
  GRAPH_FILE_T = argv[2];
  GRAPH_FILE_R = argv[3];
  dir = argv[4];
  file = argv[5];
  assert(argc == 6);

  srand(time(NULL));

  if(GRAPH_FILE_T == "" || GRAPH_FILE_R == "") {
    return EXIT_FAILURE;
  }

  cout << "Loading original adjacency list from file...   ";
  GraphIO::loadGraph(GRAPH_FILE_T.c_str(), &rep_original_t, &rep_original_m);
  cout << "Done" << endl;

  cout << "Loading recursive bisectioning from file...   ";
  rep_bisected_r = GraphIO::readRecursivePartitioningFromFile(GRAPH_FILE_R);
  cout << "Done" << endl;

  f_cat.open(dir + "/" + file + "_size", ios::app);

  cout << "Constructing DMd...   ";
  s = std::chrono::high_resolution_clock::now();
  DMd = new DiffGraphR<OffsetsBV_RRR, MyVarintByteBasedCoder>(DEGREE_HIGH_TO_LOW, rep_bisected_r, rep_original_t);
  e = std::chrono::high_resolution_clock::now();
  std::chrono::duration<double> cnt = e - s; 
  f_cat << GRAPH_S << " construction of DMd: " << cnt.count() << " s" << endl;
  cout << "Done" << endl;

  double DMd_ADJ_SIZE = DMd->adj_data_total_size_in_bytes()/(1.0*1024*1024);
  double DMd_OFFSETS_SIZE = DMd->offsets_total_size_in_bytes()/(1.0*1024*1024);
  double DMd_TOTAL_SIZE = DMd_ADJ_SIZE + DMd_OFFSETS_SIZE; 
  double DMd_TOTAL_REDUN = DMd->adj_data_total_redundancy_in_bytes()/(1.0*1024*1024);
  double DMd_SIZE_NO_REDUN = DMd_TOTAL_SIZE - DMd_TOTAL_REDUN;

  f_cat << "DMd adj [MB]: " << DMd_ADJ_SIZE << endl;
  f_cat << "DMd off [MB]: " << DMd_OFFSETS_SIZE << endl;
  f_cat << "DMd tot [MB]: " << DMd_TOTAL_SIZE << endl;
  f_cat << "DMd red [MB]: " << DMd_TOTAL_REDUN << endl;
  f_cat << "DMd nor [MB]: " << DMd_SIZE_NO_REDUN << endl;

  cout << "Constructing DMf...   ";
  s = std::chrono::high_resolution_clock::now();
  DMf = new DiffGraphR<OffsetsBV_RRR, MyVarintByteBasedNoDiffCoder>(DEGREE_HIGH_TO_LOW, rep_bisected_r, rep_original_t);
  e = std::chrono::high_resolution_clock::now();
  std::chrono::duration<double> cnt = e - s; 
  f_cat << GRAPH_S << " construction of DMf: " << cnt.count() << " s" << endl;
  cout << "Done" << endl;

  double DMf_ADJ_SIZE = DMf->adj_data_total_size_in_bytes()/(1.0*1024*1024);
  double DMf_OFFSETS_SIZE = DMf->offsets_total_size_in_bytes()/(1.0*1024*1024);
  double DMf_TOTAL_SIZE = DMf_ADJ_SIZE + DMf_OFFSETS_SIZE; 
  double DMf_TOTAL_REDUN = DMf->adj_data_total_redundancy_in_bytes()/(1.0*1024*1024);
  double DMf_SIZE_NO_REDUN = DMf_TOTAL_SIZE - DMf_TOTAL_REDUN;

  f_cat << "DMf adj [MB]: " << DMf_ADJ_SIZE << endl;
  f_cat << "DMf off [MB]: " << DMf_OFFSETS_SIZE << endl;
  f_cat << "DMf tot [MB]: " << DMf_TOTAL_SIZE << endl;
  f_cat << "DMf red [MB]: " << DMf_TOTAL_REDUN << endl;
  f_cat << "DMf nor [MB]: " << DMf_SIZE_NO_REDUN << endl;
  
  cout << "Constructing RB...   ";
  s = std::chrono::high_resolution_clock::now();
  RB = new DiffGraphR<OffsetsBV_RRR, MyVarintByteBasedCoder>(INORDER, rep_bisected_r, rep_original_t);
  e = std::chrono::high_resolution_clock::now();
  std::chrono::duration<double> cnt = e - s; 
  f_cat << GRAPH_S << " construction of RB: " << cnt.count() << " s" << endl;
  cout << "Done" << endl;

  double RB_ADJ_SIZE = RB->adj_data_total_size_in_bytes()/(1.0*1024*1024);
  double RB_OFFSETS_SIZE = RB->offsets_total_size_in_bytes()/(1.0*1024*1024);
  double RB_TOTAL_SIZE = RB_ADJ_SIZE + RB_OFFSETS_SIZE; 
  double RB_TOTAL_REDUN = RB->adj_data_total_redundancy_in_bytes()/(1.0*1024*1024);
  double RB_SIZE_NO_REDUN = RB_TOTAL_SIZE - RB_TOTAL_REDUN;

  f_cat << "RB adj [MB]: " << RB_ADJ_SIZE << endl;
  f_cat << "RB off [MB]: " << RB_OFFSETS_SIZE << endl;
  f_cat << "RB tot [MB]: " << RB_TOTAL_SIZE << endl;
  f_cat << "RB red [MB]: " << RB_TOTAL_REDUN << endl;
  f_cat << "RB nor [MB]: " << RB_SIZE_NO_REDUN << endl;

  cout << "Constructing BR...   ";
  s = std::chrono::high_resolution_clock::now();
  BR = new DiffGraphR<OffsetsBV_RRR, MyVarintByteBasedRecCoder>(NO_CHANGES, rep_bisected_r, rep_original_t, hack);
  e = std::chrono::high_resolution_clock::now();
  std::chrono::duration<double> cnt = e - s; 
  f_cat << GRAPH_S << " construction of BR: " << cnt.count() << " s" << endl;
  cout << "Done" << endl;

  double BR_ADJ_SIZE = BR->adj_data_total_size_in_bytes()/(1.0*1024*1024);
  double BR_OFFSETS_SIZE = BR->offsets_total_size_in_bytes()/(1.0*1024*1024);
  double BR_TOTAL_SIZE = BR_ADJ_SIZE + BR_OFFSETS_SIZE; 
  double BR_TOTAL_REDUN = BR->adj_data_total_redundancy_in_bytes()/(1.0*1024*1024);
  double BR_SIZE_NO_REDUN = BR_TOTAL_SIZE - BR_TOTAL_REDUN;

  f_cat << "BR adj [MB]: " << BR_ADJ_SIZE << endl;
  f_cat << "BR off [MB]: " << BR_OFFSETS_SIZE << endl;
  f_cat << "BR tot [MB]: " << BR_TOTAL_SIZE << endl;
  f_cat << "BR red [MB]: " << BR_TOTAL_REDUN << endl;
  f_cat << "BR nor [MB]: " << BR_SIZE_NO_REDUN << endl;

  f_cat.close();

  for(int i = 1; i <= MAX_THREADS; i += 2) {
    pthread_attr_t attr;
    pthread_t* threads = new pthread_t[i];
    //for(int j = 0; j < i; ++j) {
    //  threads[j] = new pthread_t();
    //}
    void* status;

    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);  

    pthread_barrier_init(&thread_barrier, NULL, i);

    for(int j = 0; j < i; j++) {
      thread_params* params = (thread_params*) malloc(sizeof(thread_params));
      params->thread_id = j;
      params->num_threads = i;

      pthread_create(&threads[j], &attr, measure, (void*)params);
    }

    pthread_attr_destroy(&attr);

    for(int j = 0; j < i; j++) {
      pthread_join(threads[j], &status);
    }

    pthread_barrier_destroy(&thread_barrier);

    //for(int j = 0; j < i; ++j) {
    //  delete threads[j];
    //}
    delete [] threads;

    if (i == 1) {
      i++;
    }
  }

  for(auto& item: *rep_bisected_r) {
    string* result = item.second;
    delete result;
  }

  delete DMd;
  delete DMf;
  delete RB;
  delete BR;
  delete rep_original_t;
  delete rep_original_m;

  return EXIT_SUCCESS;
}

