#if !defined(__STATS_H__)
#define      __STATS_H__

#include "globals.h"
#include "utils/debug.h"
#include "papi.h"
#include "papi.h"
#include "liblsb.h"

using namespace std;

class MemoryStats {
  public:
    uint64_t mem_reads_ = 0;
    uint64_t mem_writes_ = 0;
    uint64_t cas_succ_ = 0;
    uint64_t cas_unsucc_ = 0;
    uint64_t fad_ = 0;

    uint64_t cas_total_ = 0;
    uint64_t fad_total_ = 0;
    uint64_t atomic_total_ = 0;
    uint64_t mem_total_ = 0;

    uint64_t omp_critical_ = 0;

    uint64_t local_fompi_gac_sum_ = 0;
    uint64_t remote_fompi_gac_sum_ = 0;

    MemoryStats() {
      zero_all_stats();
    }

    ~MemoryStats() {
      // TODO: all deletes
    }

    inline  __attribute__((always_inline)) void zero_all_stats();
    inline  __attribute__((always_inline)) void zero_partial_stats();

    inline  __attribute__((always_inline)) void increase_mem_stats(uint64_t mem_reads, uint64_t mem_writes);
    inline  __attribute__((always_inline)) void increase_atomic_stats(uint64_t cas_succ, uint64_t cas_unsucc, uint64_t fad);
    inline  __attribute__((always_inline)) void increase_all_stats(uint64_t mem_reads, uint64_t mem_writes, uint64_t cas_succ, uint64_t cas_unsucc, uint64_t fad);

    inline  __attribute__((always_inline)) void calculate_total_stats();

    static void increase_global_stats(MemoryStats* s, MemoryStats* my_s);
    static void increase_atomic_global_stats(MemoryStats* s, MemoryStats* my_s);
    static void increase_lock_global_stats(MemoryStats* s, MemoryStats* my_s);
    static void increase_remote_global_stats(MemoryStats* s, MemoryStats* my_s);
    
    inline  __attribute__((always_inline)) void record_stats_in_lsb();
    inline  __attribute__((always_inline)) void record_atomic_stats_in_lsb();
    inline  __attribute__((always_inline)) void record_lock_stats_in_lsb();
    inline  __attribute__((always_inline)) void record_remote_stats_in_lsb();
};

//////////////////////////////// PAPI-related functions:

class PapiStats {
  public:
    PapiStats(int set_id);
    ~PapiStats();

    inline  __attribute__((always_inline)) void start_papi_daint_counters();
    inline  __attribute__((always_inline)) void read_papi_daint_counters();
    inline  __attribute__((always_inline)) void stop_papi_daint_counters();

    inline  __attribute__((always_inline)) void record_papi_in_lsb();

    // TODO: this function is dangerous.
    inline  __attribute__((always_inline)) void zero_all_stats();

    // MUST be run at the beginning of the program, outside a parallel section.
    static void init_global_papi();
    static void increase_global_stats(PapiStats* s, PapiStats* my_s);

    // Daint-specific stuff:
    static vector<int> events_daint_0;
    static vector<int> events_daint_1;
    static vector<int> events_daint_2;
    static vector<int> events_daint_3;
    static vector<int> events_daint_4;
    static map<int, vector<int>* > papi_daint_events;

    int set_id = -1;
    long long* values = NULL;
    int* events = NULL;

/*
    static vector<int> events_daint_0 {PAPI_LD_INS, PAPI_SR_INS, PAPI_BR_INS, PAPI_BR_UCN, PAPI_BR_CN};
    static vector<int> events_daint_1 {PAPI_L1_TCM, PAPI_L2_TCM, PAPI_L3_TCM};
    static vector<int> events_daint_2 {PAPI_TLB_DM, PAPI_TLB_IM};
*/
}; // End of the class definition.

//////////////////////////// PAPI stuff.

inline  __attribute__((always_inline)) void PapiStats::zero_all_stats() {
  for(unsigned int i = 0; i < papi_daint_events[set_id]->size(); ++i) {
    values[i] = 0;
  }
}

inline  __attribute__((always_inline)) void PapiStats::record_papi_in_lsb() {
  if(set_id == 0) {
    LSB_Set_Rparam_int("PAPI_LD_INS", values[0]);
    LSB_Set_Rparam_int("PAPI_SR_INS", values[1]);
    LSB_Set_Rparam_int("PAPI_BR_INS", values[2]);
    LSB_Set_Rparam_int("PAPI_BR_UCN", values[3]);
    LSB_Set_Rparam_int("PAPI_BR_CN", values[4]);
  }
  else if(set_id == 1) {
    LSB_Set_Rparam_int("PAPI_L1_TCM", values[0]);
    LSB_Set_Rparam_int("PAPI_L2_TCM", values[1]);
    LSB_Set_Rparam_int("PAPI_L3_TCM", values[2]);
  }
  else if(set_id == 2) {
    LSB_Set_Rparam_int("PAPI_TLB_DM", values[0]);
    LSB_Set_Rparam_int("PAPI_TLB_IM", values[1]);
  }
  else if(set_id == 3) {
    LSB_Set_Rparam_int("PAPI_CA_SNP", values[0]);
    LSB_Set_Rparam_int("PAPI_CA_INV", values[1]);
    LSB_Set_Rparam_int("PAPI_PRF_DM", values[2]);
    LSB_Set_Rparam_int("PAPI_MEM_WCY", values[3]);
    LSB_Set_Rparam_int("PAPI_STL_ICY", values[4]);
    LSB_Set_Rparam_int("PAPI_RES_STL", values[5]);
    LSB_Set_Rparam_int("PAPI_TOT_INS", values[6]);
    LSB_Set_Rparam_int("PAPI_BR_MSP", values[7]);
    LSB_Set_Rparam_int("PAPI_BR_PRC", values[8]);
  }
  else if(set_id == 4) {
    LSB_Set_Rparam_int("PAPI_L1_DCM", values[0]);
    LSB_Set_Rparam_int("PAPI_L1_ICM", values[1]);
    LSB_Set_Rparam_int("PAPI_L2_DCM", values[2]);
    LSB_Set_Rparam_int("PAPI_L2_ICM", values[3]);
  }
  else {
    assert(false);
  }
  zero_all_stats();

  /*for(unsigned int i = 0; i < (papi_daint_events[set_id])->size(); ++i) {
    char name[50];
    PAPI_event_code_to_name( events[i], name);
    cout << name << " " << values[i] << endl;
    //LSB_Set_Rparam_int(name, values[i]);
    LSB_Set_Rparam_int(name, i);
  }*/
}

inline  __attribute__((always_inline)) void PapiStats::start_papi_daint_counters() {
  assert(papi_daint_events[set_id] != NULL);
  int ret = PAPI_start_counters(events, papi_daint_events[set_id]->size());
  if(ret != PAPI_OK) {
    cout << "PAPI failed to start counters: " << PAPI_strerror(ret) << endl;
    exit(EXIT_FAILURE);
  }
}

inline  __attribute__((always_inline)) void PapiStats::read_papi_daint_counters() {
  int ret = PAPI_read_counters(values, (papi_daint_events[set_id])->size());
  if(ret != PAPI_OK) {
    cout << "PAPI failed to read counters: " << PAPI_strerror(ret) << endl;
    exit(EXIT_FAILURE);
  }
}

inline  __attribute__((always_inline)) void PapiStats::stop_papi_daint_counters() {
  int ret = PAPI_stop_counters(values, (papi_daint_events[set_id])->size());
  if(ret != PAPI_OK) {
    cout << "PAPI failed to stop counters: " << PAPI_strerror(ret) << endl;
    exit(EXIT_FAILURE);
  }
}

/////////////////// End of PAPI stuff.

inline  __attribute__((always_inline)) void MemoryStats::record_stats_in_lsb() {
  calculate_total_stats();
  LSB_Set_Rparam_int("mem_reads", mem_reads_);
  LSB_Set_Rparam_int("mem_writes", mem_writes_);
  LSB_Set_Rparam_int("mem_total", mem_total_);
  LSB_Set_Rparam_int("cas_succ", cas_succ_);
  LSB_Set_Rparam_int("cas_unsucc", cas_unsucc_);
  LSB_Set_Rparam_int("fad", fad_total_);
  LSB_Set_Rparam_int("atomic_total", atomic_total_);
  LSB_Set_Rparam_int("omp_critical", omp_critical_);
  zero_all_stats();
}

inline  __attribute__((always_inline)) void MemoryStats::record_atomic_stats_in_lsb() {
  calculate_total_stats();
  LSB_Set_Rparam_int("cas_succ", cas_succ_);
  LSB_Set_Rparam_int("cas_unsucc", cas_unsucc_);
  LSB_Set_Rparam_int("fad", fad_total_);
  LSB_Set_Rparam_int("atomic_total", atomic_total_);
  zero_all_stats();
}

inline  __attribute__((always_inline)) void MemoryStats::record_lock_stats_in_lsb() {
  LSB_Set_Rparam_int("omp_critical", omp_critical_);
  omp_critical_ = 0;
}

inline  __attribute__((always_inline)) void MemoryStats::record_remote_stats_in_lsb() {
  LSB_Set_Rparam_int("local_fompi_gac_sum", local_fompi_gac_sum_);
  LSB_Set_Rparam_int("remote_fompi_gac_sum", remote_fompi_gac_sum_);
  local_fompi_gac_sum_ = 0;
  remote_fompi_gac_sum_ = 0;
}

inline  __attribute__((always_inline)) void MemoryStats::zero_all_stats() {
  mem_reads_ = 0;
  mem_writes_ = 0;
  cas_succ_ = 0;
  cas_unsucc_ = 0;
  fad_ = 0;

  cas_total_ = 0;
  fad_total_ = 0;
  atomic_total_ = 0;
  mem_total_ = 0;

  omp_critical_ = 0;

  local_fompi_gac_sum_ = 0;
  remote_fompi_gac_sum_ = 0;
}

inline  __attribute__((always_inline)) void MemoryStats::zero_partial_stats() {
  mem_reads_ = 0;
  mem_writes_ = 0;
  cas_succ_ = 0;
  cas_unsucc_ = 0;
  fad_ = 0;

  omp_critical_ = 0;
}

inline  __attribute__((always_inline)) void MemoryStats::increase_mem_stats(uint64_t mem_reads, uint64_t mem_writes) {
  mem_reads_ += mem_reads;
  mem_writes_ += mem_writes;
}

inline  __attribute__((always_inline)) void MemoryStats::increase_atomic_stats(uint64_t cas_succ, uint64_t cas_unsucc, uint64_t fad) {
  cas_succ_ += cas_succ;
  cas_unsucc_ += cas_unsucc;
  fad_ += fad;
}

inline  __attribute__((always_inline)) void MemoryStats::increase_all_stats(uint64_t mem_reads, uint64_t mem_writes, uint64_t cas_succ, uint64_t cas_unsucc, uint64_t fad) {
  mem_reads_ += mem_reads;
  mem_writes_ += mem_writes;
  cas_succ_ += cas_succ;
  cas_unsucc_ += cas_unsucc;
  fad_ += fad;
}

inline  __attribute__((always_inline)) void MemoryStats::calculate_total_stats() {
  cas_total_ = cas_succ_ + cas_unsucc_;
  fad_total_ = fad_;
  atomic_total_ = cas_total_ + fad_total_;
  mem_total_ = mem_writes_ + mem_reads_;
}

#endif
