#pragma warning (disable:4996)

#include <map>
#include <string>
#include <vector>
using namespace std;


void AnalyzeHypotheses(char* szdataset_name, char* sztarget_attr, char* sztarget_value, int nmax_len, double dmin_sup, double dmax_pvalue, double dmin_effect_size, char* szanalyze_option, char* szoutput_name);
void VaryMinSup();


#define CLOSED_FLAG_BITPOS 0
#define DIFF_LIST_FLAG_BITPOS 1

#define CONFIDENCE 0
#define ODDS_RATIO 1

#define PERMUTATION 0
#define PERMUTATION_SUPERSET 1
#define SIMULATED_PERM 2
#define SIMULATED_PERM_SUBSET 3
#define SIMULATED_PERM_SUBSET_SIB 4
#define SIMULATED_PERM_SUBSET_LEFT_SIB 5
#define SIMULATED_PERM_SUBSET_ALLSIB 6
#define SIMULATED_PERM_SUPERSET 7
#define SIMULATED_PERM_HYBRID 8
#define SIMULATED_PERM_SUBSETS 9



#define SEEDING_GLOBAL 0
#define SEEDING_PER_RULE 1
#define SEEDING_PER_PERM 2

#define MATCH_PVALUE 0
#define MATCH_PVALUE_FP 1
#define MATCH_SET 2
#define MATCH_PROBABILITY 3
#define MATCH_BINARY 4
#define MATCH_EXACT_BINARY 5


struct ASSOCRULE
{
	int npreorder;
	int npat_len;
	int *pattern;
	int nsup;
	int ntgt_sup;
	int *ptid_list;
	double dscore;
	double dpvalue;
	double dadjusted_pvalue;
	double dcond_pvalue;
	int nclass_no;
};

struct PAT_DIR_NODE
{
	int nflag;
	int ntidlist_pos;
	int nparent_preorder;
};

extern int gnum_of_tests;
extern double gdmax_pvalue;
extern double gdpermFWER_pvalue_thres;

struct SIGN_RULE_NUM
{
	double dpvalue_thres;
	double dBC_pvalue_thres;
	double dBH_pvalue_thres;
	double dpermBH_pvalue_thres;
	double dperm_FWER_thres;

	int num_of_sign_rules;
	int num_of_BCsign_rules;
	int num_of_BHsign_rules;

	int num_of_perm_sign_rules;
	int num_of_perm_BCsign_rules;
	int num_of_perm_BHsign_rules;

	int num_of_permFWER_rules;
};

struct MATCH_METRICS
{
	double drecall;
	double dprecision;
	double dF1;
	int num_of_FPs;
	double dFDR;
};

struct MATCH_STAT
{
	MATCH_METRICS sign_match_metrics;
	MATCH_METRICS BCsign_match_metrics;
	MATCH_METRICS BHsign_match_metrics;

	MATCH_METRICS perm_sign_match_metrics;
	MATCH_METRICS perm_BCsign_match_metrics;
	MATCH_METRICS perm_BHsign_match_metrics;

	MATCH_METRICS perm_FWER_match_metrics;
};

struct PVALUE_STAT
{
	double davg;
	double dstd_dev;
	double dmin;
	double dmax;
	double dmedian;
};



extern int gndb_size;
extern int gntgt_sup;
extern int gnmin_sup;
extern int gnmax_sup;

extern int gndataset_instant_num;
extern int gndataset_attr_num;
extern int gndataset_rule_num;
extern int gndataset_rule_sup;
extern double gddataset_rule_conf;
extern int gnmine_min_sup;
extern double gdmine_min_conf;

extern int gnum_of_holdout_exp_tests;

extern int gnum_of_classes;
extern char gszmine_target_value[10];
extern char gszembed_target_value[10];


void GenSynDatasets(int num_of_rows, int num_of_pos_rows, int num_of_clmns, int nmin_v, int nmax_v, int num_of_rules, int nmin_len, int nmax_len, int nmin_sup, int nmax_sup, int nchoice, double dmin_es, double dmax_es, int num_of_datasets, char* szoutput_name);
//void GenSynDatasets(int num_of_rows, int num_of_pos_rows, int num_of_clmns, int nmin_v, int nmax_v, int num_of_rules, int nmax_len, int nmin_sup, int nmax_sup, int nchoice, double dpvalue, double dpmax, double deffect_size, int num_of_datasets, char* szoutput_name);
void ConvertRules(char* szdataset_name, int num_of_attrs, int num_of_rules, char* szoutput_name);
int LoadTrueRules(char* szdataset_name, ASSOCRULE *&prules, int *&pitemset_buf, int *&ptidlist_buf, int &nmax_rule_len);


void MineRules(char* szdataset_name, char* sztarget_attr, char* sztarget_value, int nmin_sup, int nmax_len, double dmax_pvalue, double dmax_local_pvalue, int neffect_size_method, double deffect_size, int ncorrection_method, int num_of_repetitions, int nseeding_method, char* szoutput_name);
void MineRules(char* szdataset_name, char* sztarget_attr, char* sztarget_value, int nmin_sup, int nmax_len, double dmax_pvalue, double deffect_size, int ncorrection_method, int num_of_repetitions, double dpvalue_buf_size, bool bperm_diff_list, char* szoutput_name);
void ReadTreeStatis(char* szoutput_name);
double GetRunTime();
void LoadTgtValues(char* szoutput_name, void *ptgt_values);
void LoadTgtValueMap(char* szoutput_name, map<string, int> *ptgtvalue_map);
void LoadAttrNames(char* sznames_filename, vector<string> *pvec_attr_names);
int LoadMinedRules(char* szoutput_name, ASSOCRULE * &prules, int *& pitemset_buf, int * &ptidlist_buf, int &nmax_rule_len);
void LoadMinedTidList(char *szoutput_name, ASSOCRULE *prules, int num_of_rules);
void LoadSignRuleNums(char* szoutput_name, SIGN_RULE_NUM *psign_rule_nums);
void GetPermPvalues(char* szoutput_name, int num_of_permutations, int nperm_no, char *szoutput_filename);

void GetAvgPermPvalues(char* szdataset_name, char* sztarget_attr, char* sztarget_value, int nmin_sup, int nmax_len, double dmax_pvalue, double dmax_local_pvalue, int neffect_size_method, double deffect_size, int ncorrection_method, int num_of_repetitions, int nseeding_method, int num_of_runs, char* szoutput_name);
void GetAvgPermPvalues(char* szdataset_name, char* sztarget_attr, char* sztarget_value, int nmin_sup, int nmax_len, double dmax_pvalue, double dmax_local_pvalue, int neffect_size_method, double deffect_size, int num_of_repetitions, int nseeding_method, int num_of_runs, char* szoutput_name);
void Merge8Files(char *szprefix, char* szsuffix);
void GetAvg(char* szfilename, int ngroup_clmn_no, int num_of_avg_clmns, char* szoutput_filename);

void MatchRules(ASSOCRULE *pmined_rules, int num_of_mined_rules, ASSOCRULE *ptrue_rules, int num_of_true_rules, int nmatch_method, double dmatch_thres, SIGN_RULE_NUM *psign_rule_nums, int* ptgt_values, MATCH_STAT *pmatch_stat);
void MatchRules_pvalue(ASSOCRULE *pmined_rules, int num_of_mined_rules, ASSOCRULE *ptrue_rules, int nmatch_method, SIGN_RULE_NUM *psign_rule_nums, int* ptgt_values, MATCH_STAT *pmatch_stat);
void MatchRules(char* szrule_output_name, char* szoutput_name, char* szdataset_name, int nmatch_method, double dmatch_thres, char* szoutput_filename);
void MatchRules(char* szrule_output_name, char* szdataset_name, char* szoutput_filename);
void MatchRules(char* szdataset_name, int num_of_rules, char* szoutput_name, double dmatch_thres, char* szsum_prefix);

void OutputMatchStat(char* szdataset_name, char *szoutput_name, SIGN_RULE_NUM *psign_rule_nums, char *szoutput_filename);
void OutputMatchStat(char* szdataset_name, char *szoutput_name, double dmatch_thres, SIGN_RULE_NUM *psign_rule_nums, MATCH_STAT *pmatch_stat, char *szoutput_filename);
void LoadAttrValues2ItemMap(char* szattrvalue_filename, std::map<std::string, int> *pitem_map);


int comp_int(const void *e1, const void *e2);
int get_intersection(int nlen1, int *pset1, int nlen2, int *pset2, int *presult_set);
int get_intersection_size(int nlen1, int *pset1, int nlen2, int *pset2);
int get_diffset(int nlen1, int *pset1, int nlen2, int *pset2, int *presult_set);
int get_union(int nlen1, int *pset1, int nlen2, int *pset2, int *presult_set);


//calculate p-value using fisher's exact test
extern double *gpdfactorials;
void InitFactorials(double* pdfactorials, int n);
double CalcFisherPvalue(int n, int x, int y, int k);
double CalcTwoTailedFisherPvalue(int n, int x, int y, int k);
void GenPvalue(int n, int y, int num_of_classes);

// holdout header
void holdout(char *szmine_dataset, char *szcheck_dataset, char *szoutput_name, char* sztarget_attr, char* sztarget_value, 
			 int nmine_min_sup, int nmine_max_len, double dmine_pvalue, double dmine_local_pvalue, 
			 int neffect_size_method, double dmine_effect_size);
void output_tgtvalues(char *szoutput_name, int *ptgt_values, int num, std::vector<int> *targets);
void read_convert(char *szcheck_dataset, std::vector<std::vector<int> *> *converted, std::vector<int> *targets, char *szoutput_name);
void holdout_rules(char *szrule_output_name, vector<int*> *converted, int ntrans_len, std::vector<int> *targets, char *szoutput_name, double dvalid_pvalue);
void valid(int nfirst_dataset_size, ASSOCRULE *pmined_rules, int num_of_mined_rules, vector<int*>* converted, int ntrans_len, vector<int> *targets,  std::vector<ASSOCRULE *> *pvalid_rules, int nmin_sup, double dvalid_pvalue);
void recalculate(std::vector<ASSOCRULE *> *pvalid_rules, int num_of_mined_rules, double dvalid_pvalue, SIGN_RULE_NUM &thesign_rule_nums);
void output_holdout_rules(std::vector<ASSOCRULE *> *pvalid_rules, char *szoutput_name, SIGN_RULE_NUM &thesign_rule_nums, int num_of_mined_rules, double dmine_pvalue);
void copy(ASSOCRULE *dest, ASSOCRULE *src);
bool compare(ASSOCRULE *first, ASSOCRULE *second);
void RandomHoldout(char *szdataset_name, char* sztarget_attr, char* sztarget_value, int nmine_min_sup, int nmine_max_len, double dmine_pvalue, double dmine_effect_size, char *szoutput_name);


// holdout_match header
void holdout_match_rules(char *szmerged_dataset, char *szrule_dataset, int num_of_clmns, int num_of_rules, char *szoutput_name, double dmatch_thres);
void LoadTgtValues(char *szoutputname, int *&ptgt_values, char *holdout);
int LoadMinedRules(char *szdataset_name, ASSOCRULE *&pmined_rules, int &nmax_mined_rule_len);
void LoadMinedTidList(char *szdataset_name, ASSOCRULE *pmined_rules, int num_of_mined_rules, char *holdout);

// partition header
void partition(char *szdataset_name, char *szoutput_name, std::vector<int> &mapping);
void map_tid(char *szdataset_name, char *szoutput_name, std::vector<int> &mapping);
void copy_namefile(char *szdataset_name, char *szoutput_name);
void copy_rulefile(char *szdataset_name, char *szoutput_name);
void split(std::string s, std::vector< std::string > &tokens, char delim);

void rm_data();

void SplitDatasets(char* szfilename, char* szoutput_suffix);
void SplitDatasetNPara(char* szfilename, char* szoutput_suffix);
void SplitDatasets(char* szfilename, int nrow_sep_num, char* szoutput_prefix, char* szoutput_suffix);
void SortPValue(char* szpvalue_filename, char* szoutput_filename);
void CalcConfPvalueMatrix(char* szpvalue_filename, double dstart_conf, double dconf_interval, double dpvalue_interval, char* szoutput_filename);

int comp_double(const void *e1, const void *e2);

