#include <cinttypes>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <pthread.h>
//#include <mpi.h>
#include <cassert>
#include <chrono>
#include <iostream>

#include "globals.h"

#include "utils/graphIO.h"
#include "utils/debug.h"
#include "utils/diffAnalysis.h"

#include "representations/tradListGraphR.h"
#include "representations/metisR.h"
#include "representations/diffGraphR.h"

#include "coders/myVarintByteBasedCoder.h"

#include "offsetStructures/offsetsPtrs.h"
#include "offsetStructures/offsetsPtrsCompressed.h"
#include "offsetStructures/offsetsBV.h"
#include "offsetStructures/offsetsBVCompressed.h"

#include "offsetStructures/offsetsPtrsLogn.h"
#include "offsetStructures/offsetsBV.h"
#include "offsetStructures/offsetsBV_IL.h"
#include "offsetStructures/offsetsBV_SD.h"
#include "offsetStructures/offsetsBV_RRR.h"

DiffGraphR<OffsetsBV_RRR, MyVarintByteBasedCoder>* dif_B_bvRRR;
DiffGraphR<OffsetsBV_SD, MyVarintByteBasedCoder>* dif_B_bvSD;
DiffGraphR<OffsetsBV_IL, MyVarintByteBasedCoder>* dif_B_bvIL;
DiffGraphR<OffsetsBV, MyVarintByteBasedCoder>* dif_B_bv;
DiffGraphR<OffsetsPtrs, MyVarintByteBasedCoder>* dif_B_ptr64;
DiffGraphR<OffsetsPtrsLogn, MyVarintByteBasedCoder>* dif_B_ptrLogn;
DiffGraphR<OffsetsPtrs, MyVarintByteBasedCoder>* dif_B;
DiffGraphR<OffsetsPtrsCompressed, MyVarintByteBasedCoder>* dif_B_com;
DiffGraphR<OffsetsBVCompressed, MyVarintByteBasedCoder>* dif_B_bv_com;

std::string GRAPH_S;
std::string GRAPH_NAME;
std::string GRAPH_FILE_T;
std::string GRAPH_FILE_R;

ofstream f_cat;

TradListGraphR* rep_original_t = NULL;
MetisR* rep_original_m = NULL;
SimpleRecGraphR* rep_bisected_r = NULL;

int MAX_ITERATIONS = 10000;
int ITERATIONS_PER_WARMUP = 100;
int MAX_THREADS = 1;

pthread_barrier_t thread_barrier;
pthread_mutex_t thread_mutex;

typedef struct thread_params_t {
  int thread_id;
  int num_threads;
} thread_params;

//double start_time = 0;
//double end_time = 0;

std::chrono::time_point<std::chrono::system_clock> start_time, end_time;

////////////////////////////////////////////////////////////

int64_t* rand_v = NULL;

////////////////////////////////////////////////////////////

void measure_query_ptrs64(int thread_id) {
  pthread_barrier_wait(&thread_barrier);

  for (int i = 0; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if(i == ITERATIONS_PER_WARMUP) {
      pthread_barrier_wait(&thread_barrier);
      if(thread_id == 0) {
        start_time = std::chrono::high_resolution_clock::now(); //MPI_Wtime();
      }
    }

    dif_B->offsets_->getVertexOffset(rand_v[i]);
    //assert(deg >= 0 && deg < d->n);
  } 
}

void measure_query_ptrs64C(int thread_id) {
  pthread_barrier_wait(&thread_barrier);

  for (int i = 0; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if(i == ITERATIONS_PER_WARMUP) {
      pthread_barrier_wait(&thread_barrier);
      if(thread_id == 0) {
        start_time = std::chrono::high_resolution_clock::now(); //MPI_Wtime();
      }
    }

    dif_B_com->offsets_->getVertexOffset(rand_v[i]);
    //assert(deg >= 0 && deg < d->n);
  } 
}

void measure_query_bv(int thread_id) {
  pthread_barrier_wait(&thread_barrier);

  for (int i = 0; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if(i == ITERATIONS_PER_WARMUP) {
      pthread_barrier_wait(&thread_barrier);
      if(thread_id == 0) {
        start_time = std::chrono::high_resolution_clock::now(); //MPI_Wtime();
      }
    }

    dif_B_bv->offsets_->getVertexOffset(rand_v[i]);
    //assert(deg >= 0 && deg < d->n);
  } 
}

void measure_query_bvC(int thread_id) {
  pthread_barrier_wait(&thread_barrier);

  for (int i = 0; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if(i == ITERATIONS_PER_WARMUP) {
      pthread_barrier_wait(&thread_barrier);
      if(thread_id == 0) {
        start_time = std::chrono::high_resolution_clock::now(); //MPI_Wtime();
      }
    }

    dif_B_bv_com->offsets_->getVertexOffset(rand_v[i]);
    //assert(deg >= 0 && deg < d->n);
  } 
}

void measure_query_bvIL(int thread_id) {
  pthread_barrier_wait(&thread_barrier);

  for (int i = 0; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if(i == ITERATIONS_PER_WARMUP) {
      pthread_barrier_wait(&thread_barrier);
      if(thread_id == 0) {
        start_time = std::chrono::high_resolution_clock::now(); //MPI_Wtime();
      }
    }

    dif_B_bvIL->offsets_->getVertexOffset(rand_v[i]);
    //assert(deg >= 0 && deg < d->n);
  } 
}

void measure_query_bvSD(int thread_id) {
  pthread_barrier_wait(&thread_barrier);

  for (int i = 0; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if(i == ITERATIONS_PER_WARMUP) {
      pthread_barrier_wait(&thread_barrier);
      if(thread_id == 0) {
        start_time = std::chrono::high_resolution_clock::now(); //MPI_Wtime();
      }
    }

    dif_B_bvSD->offsets_->getVertexOffset(rand_v[i]);
    //assert(deg >= 0 && deg < d->n);
  } 
}

void measure_query_bvRRR(int thread_id) {
  pthread_barrier_wait(&thread_barrier);

  for (int i = 0; i < ITERATIONS_PER_WARMUP + MAX_ITERATIONS; i++) {
    if(i == ITERATIONS_PER_WARMUP) {
      pthread_barrier_wait(&thread_barrier);
      if(thread_id == 0) {
        start_time = std::chrono::high_resolution_clock::now(); //MPI_Wtime();
      }
    }

    dif_B_bvRRR->offsets_->getVertexOffset(rand_v[i]);
    //assert(deg >= 0 && deg < d->n);
  } 
}


void* measure(void* args) {
  thread_params* params = (thread_params*)args;

  int thread_id = params->thread_id;
  int num_threads = params->num_threads;

  if(thread_id == 0) {
    cerr << ">>> start ptrs64: " << endl;
  }

  measure_query_ptrs64(thread_id);
  pthread_barrier_wait(&thread_barrier);

  if(thread_id == 0) {
    end_time = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed_seconds = end_time-start_time;
    f_cat << GRAPH_S << " " << num_threads << " ptrs64 " << elapsed_seconds.count() << " " << elapsed_seconds.count() / MAX_ITERATIONS << endl;
  }


  if(thread_id == 0) {
    cerr << ">>> start ptrs64C: " << endl;
  }

  measure_query_ptrs64C(thread_id);
  pthread_barrier_wait(&thread_barrier);

  if(thread_id == 0) {
    end_time = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed_seconds = end_time-start_time;
    f_cat << GRAPH_S << " " << num_threads << " ptrs64C " << elapsed_seconds.count() << " " << elapsed_seconds.count() / MAX_ITERATIONS << endl;
  }


  if(thread_id == 0) {
    cerr << ">>> start bv: " << endl;
  }

  measure_query_bv(thread_id);
  pthread_barrier_wait(&thread_barrier);

  if(thread_id == 0) {
    end_time = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed_seconds = end_time-start_time;
    f_cat << GRAPH_S << " " << num_threads << " bvUN " << elapsed_seconds.count() << " " << elapsed_seconds.count() / MAX_ITERATIONS << endl;
  }


  if(thread_id == 0) {
    cerr << ">>> start bvC: " << endl;
  }

  measure_query_bvC(thread_id);
  pthread_barrier_wait(&thread_barrier);

  if(thread_id == 0) {
    end_time = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed_seconds = end_time-start_time;
    f_cat << GRAPH_S << " " << num_threads << " bvC " << elapsed_seconds.count() << " " << elapsed_seconds.count() / MAX_ITERATIONS << endl;
  }


  if(thread_id == 0) {
    cerr << ">>> start bvIL: " << endl;
  }

  measure_query_bvIL(thread_id);
  pthread_barrier_wait(&thread_barrier);

  if(thread_id == 0) {
    end_time = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed_seconds = end_time-start_time;
    f_cat << GRAPH_S << " " << num_threads << " bvIL " << elapsed_seconds.count() << " " << elapsed_seconds.count() / MAX_ITERATIONS << endl;
  }


  if(thread_id == 0) {
    cerr << ">>> start bvSD: " << endl;
  }

  measure_query_bvSD(thread_id);
  pthread_barrier_wait(&thread_barrier);

  if(thread_id == 0) {
    end_time = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed_seconds = end_time-start_time;
    f_cat << GRAPH_S << " " << num_threads << " bvSD " << elapsed_seconds.count() << " " << elapsed_seconds.count() / MAX_ITERATIONS << endl;
  }


  if(thread_id == 0) {
    cerr << ">>> start bvRRR: " << endl;
  }

  measure_query_bvRRR(thread_id);
  pthread_barrier_wait(&thread_barrier);

  if(thread_id == 0) {
    end_time = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed_seconds = end_time-start_time;
    f_cat << GRAPH_S << " " << num_threads << " bvRRR " << elapsed_seconds.count() << " " << elapsed_seconds.count() / MAX_ITERATIONS << endl;
  }


  free(params);
  pthread_exit(NULL);
}

int main (int argc, char *argv[]) {
  GRAPH_S = argv[1];
  GRAPH_NAME = argv[2];
  GRAPH_FILE_T = argv[3];
  GRAPH_FILE_R = argv[4];

  string dir = argv[5];
  string file = argv[6];

  if(GRAPH_FILE_T == "" || GRAPH_FILE_R == "") {
    return EXIT_FAILURE;
  }

  MAX_THREADS = atoi(argv[7]);
  MAX_ITERATIONS = atoi(argv[8]);
  ITERATIONS_PER_WARMUP = atoi(argv[9]);

  assert(argc == 10);

  srand(time(NULL));

  GraphIO::loadGraph(GRAPH_FILE_T.c_str(), &rep_original_t, &rep_original_m);
  rep_bisected_r = GraphIO::readRecursivePartitioningFromFile(GRAPH_FILE_R);

  rand_v = new int64_t[MAX_ITERATIONS + ITERATIONS_PER_WARMUP]();

  for(int64_t i = 0; i < (MAX_ITERATIONS + ITERATIONS_PER_WARMUP); ++i) {
    rand_v[i] = rand() % rep_original_t->n_;
  }

  dif_B = new DiffGraphR<OffsetsPtrs, MyVarintByteBasedCoder>(NO_CHANGES, rep_bisected_r, rep_original_t);
  dif_B_com = new DiffGraphR<OffsetsPtrsCompressed, MyVarintByteBasedCoder>(NO_CHANGES, rep_bisected_r, rep_original_t);
  dif_B_bv = new DiffGraphR<OffsetsBV, MyVarintByteBasedCoder>(NO_CHANGES, rep_bisected_r, rep_original_t);
  dif_B_bv_com = new DiffGraphR<OffsetsBVCompressed, MyVarintByteBasedCoder>(NO_CHANGES, rep_bisected_r, rep_original_t);
  dif_B_bvRRR   = new DiffGraphR<OffsetsBV_RRR, MyVarintByteBasedCoder>(NO_CHANGES, rep_bisected_r, rep_original_t);
  dif_B_bvSD    = new DiffGraphR<OffsetsBV_SD, MyVarintByteBasedCoder>(NO_CHANGES, rep_bisected_r, rep_original_t);
  dif_B_bvIL    = new DiffGraphR<OffsetsBV_IL, MyVarintByteBasedCoder>(NO_CHANGES, rep_bisected_r, rep_original_t);
  dif_B_bv    = new DiffGraphR<OffsetsBV, MyVarintByteBasedCoder>(NO_CHANGES, rep_bisected_r, rep_original_t);

     f_cat.open(dir + "/" + file, ios::app);

  for(int i = MAX_THREADS; i <= MAX_THREADS; i *= 2) {
    pthread_attr_t attr;
    pthread_t* threads = new pthread_t[i];
    //for(int j = 0; j < i; ++j) {
    //  threads[j] = new pthread_t();
    //}
    void* status;

    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);  

    pthread_barrier_init(&thread_barrier, NULL, i);
    pthread_mutex_init(&thread_mutex, NULL);

    for(int j = 0; j < i; j++) {
      thread_params* params = (thread_params*) malloc(sizeof(thread_params));
      params->thread_id = j;
      params->num_threads = i;

      pthread_create(&threads[j], &attr, measure, (void*)params);
    }

    pthread_attr_destroy(&attr);

    for(int j = 0; j < i; j++) {
      pthread_join(threads[j], &status);
    }

    pthread_barrier_destroy(&thread_barrier);
    pthread_mutex_destroy(&thread_mutex);

    //for(int j = 0; j < i; ++j) {
    //  delete threads[j];
    //}
    delete [] threads;
  }

  f_cat.close();

  for(auto& item: *rep_bisected_r) {
    string* result = item.second;
    delete result;
  }

  delete rep_original_t;
  delete rep_original_m;

  delete dif_B;
  delete dif_B_com;
  delete dif_B_bv;
  delete dif_B_bv_com;
  delete dif_B_bvRRR;  
  delete dif_B_bvSD;  
  delete dif_B_bvIL; 

  delete [] rand_v;

  return EXIT_SUCCESS;
}
