#pragma once 

#include <stdio.h>
#include <math.h>

#include "cfptree_outbuf.h"
#include "gen_tidlist.h"


#define TREE_TRAVERSE_BUF_SIZE   (1<<26)


struct PAT_INFO
{
	int npreorder;
	int nsupport;
	char* ptgt_stat;
	double dmean;
	double dstd_dev;
};

struct DIFF_ITEM
{
	int nitem;
	int npreorder;
	int nsupport;
	char *ptgt_stat;
	double dmean;
	double dstd_dev;
	int *ptid_list;
};

struct PVALUE_MATRIX
{
	union
	{
		double **ppattr_minpvalue_matrixes;
		double *pattr_minpvalues;
	};
	double *pattr_minpvalue_buf;
};

struct CONTRIBUTION
{
	int nid;
	int ndiff_change_rank;
	int ncontr_rank;
	int num_of_values; //used for attributes, useless for items
	double ddiff_change;
	double dcontribution;
};

struct HYPOTHESIS
{
	int norig_hid;
	int ncontext_len;
	int *pcontext;
	PAT_INFO *ppat_info;
	int num_of_diff_items;
	DIFF_ITEM *pdiff_items;
	double dscore;
	double dpvalue;
};


struct ASSOCRULE
{
	int npreorder;
	int npat_len;
	int *pattern;
	PAT_INFO *ppat_info;
	int ntgt_class;
	int ntidlist_len;
	int *ptid_list;
	double dscore;
	double dpvalue;
	double dadjusted_pvalue;
	double dcond_pvalue;
};



#define EXPLORE_IN_PAT 1
#define EXPLORE_FOUND_IN_PREFIX 2


class CTREE_EHTA
{
	FILE *mfpcfp_file;
	int mnpreorder;
	int mndepth;
	int mnum_of_closed_pats;

	ACTIVE_NODE *mpactive_stack;
	int mnactive_top, mnmin_inmem_level;
	char **mptree_buffer;
	int mnbuf_size, mnbuf_num_of_pages, mninbuf_start_pos, mninbuf_end_pos;
	char *mpbuffer_page;
	int mncur_disk_pos;

	int *mppattern, mnpat_len;
	PAT_INFO mopat_info;
	DIFF_ITEM *mpdiff_items;
	char mszpath[200];

	char *mpitem_bitmap, *mpattr_bitmap;

	ENTRY *mpentry_buf;
	int mndfs_entry_buf_pos;
	char *mptgt_stat_buf;

	FILE *mfp_tidlist;
	FILE *mfp_tidlist_dir;
	PAT_DIR_NODE *mptidlist_dir_nodes;
	int mnmax_tidlist_len;

	PVALUE_MATRIX *mpmin_pvalue_matrix_stack;
	
	//=== routines for hypothesis generation
	int LoadPages(int nstart_pos_boundary, int ndisk_pos);
	void gen_context(int ndisk_pos, int nparent_sup, int nparent_boundary);
	void GenContextHypotheses(int nstart_pos_boundary, int ncontext_sup);
	bool search_pat_ondisk(int ndisk_pos, int npat_len, DIFF_ITEM *pdiff_item);
	bool search_pat_inmem(int nstart_pos_boundary, int ndisk_pos, int npat_len, DIFF_ITEM *pdiff_item);
	void test_hypotheses(int ncontext_sup, int num_of_diff_items);
	//===

	//--- routines for checking the redundancy of hypothesis
	void InitPvalueBuf();
	void ResetPvalueBuf();
	bool IsMinPvalue(int ndiff_item1, int ndiff_item2, double dpvalue);
	bool IsMinPvalue(int nattr_no, double dpvalue);
	void FreePvalueBuf();
	//---

	//====== variables and routines for hypothesis analysis & exploration
	FILE *mfptext, *mfpattrH;
	PAT_INFO **mppreasoning_items, *mpreasoning_item_buf, *mpcontext_rs_items;
	bool mbis_uncovered_counted;
	char *mphyps_diff_item_tgt_stat_buf, *mpmdiff_item_tgt_stat_buf, *mpcntxt_item_tgt_stat_buf, *mprs_item_tgt_stat_buf;
	HYPOTHESIS monehypothesis;


	//--- *** new *** ---
	char *mpfilter_attr_bitmap, *mpfilter_item_bitmap;
	void HYP_RemoveOneAttr(char* szinput_name, int nhno, HYPOTHESIS *phypothesis, char* szattr_name, FILE *fpout);
	void HYP_AddOneAttr(char* szinput_name, int nhno, HYPOTHESIS *phypothesis, char* szattr_name, FILE *fpout);
	void HYP_ReplaceOneAttr(char* szinput_name, int nhno, HYPOTHESIS *phypothesis, char* szattr_name, FILE *fpout);
	void HYP_FilterByAttrsNItems(char* szinput_name, char* szattr_file, char* szitem_file, char* szcomp_attr_file, char* szcomp_item_file);
	void search_context(int ndisk_pos, int nparent_boundary, int num_of_items);
	void search_context_immd_supersets(int nstart_pos_boundary);
	void gen_hypotheses(int num_of_diff_items);
	//====== 



	//=== old routines for hypothesis exploration that are not in use
	void PullDown(int nhno, HYPOTHESIS *phypothesis);
	void RollUp(int nhno, HYPOTHESIS *phypothesis);
	void search_immediate_subsets(int ndisk_pos, int npat_len, int nlost_item, ENTRY *pparent_entry, char* ptgt_stat, PAT_INFO *preasoning_items);
	void ContextReplace1Item(int nhno, HYPOTHESIS *phypothesis);
	void context_replace_one_item(int ndisk_pos, int npat_len, int nreplace_item, int nmatch_item, PAT_INFO *preasoning_items);
	void ChangeCompItems(int nhno, HYPOTHESIS *phypothesis);
	void replace_comp_item(int ndisk_pos, int npat_len, int nitem_to_be_replaced, int nreplace_item, PAT_INFO *preasoning_items);
	void ChangeCompAttr(int nhno, HYPOTHESIS *phypothesis);
	void AnalyzeOneHypothesis(HYPOTHESIS *phypothesis);
	void search_immediate_supersets(int ndisk_pos, int npat_len, int nextra_item, PAT_INFO *preasoning_items);


	//=============== routines for association rule generation =================
	FILE *mfppvalue;
	char *mpglobal_tgt_stat_diff, *mpXtgt_stat_diff;
	ASSOCRULE morule;
	int *mpintersection_set;
	int *mpshared_items, mnum_of_shared_items;
	int *mptid_list;

	void gen_rules(int ndisk_pos, int nparent_entry_no, int nparent_sup, bool bparent_is_singleton, double dmax_pvalue);
	void test_onerule(int nentry_no, bool bis_singleton_rule, double dmax_pvalue);


	//--- *** new *** ---
	//functions for association rule exploration 
	int mnexplore_attr_no;
	ASSOCRULE *mpexplore_rule;

	void AR_RemoveOneAttr(char* szinput_name, ASSOCRULE *prule, char* szattr_name, PAT_INFO *ppat_info_array, FILE *fpout);
	void search_immd_subsets(int ndisk_pos, int num_of_items, int nmissing_item, PAT_INFO *ppat_info_array);
	bool search_exact(int ndisk_pos, int num_of_items, PAT_INFO *ppat_info);
	void tidlist_immd_subsets(char* szinput_name, ASSOCRULE *prule, PAT_INFO *ppat_info_array, FILE *fpout);
	void output_immd_subsets(FILE *fpout, ASSOCRULE *prule, int nremoved_item, PAT_INFO *ppat_info_array);
	void output_one_freq_rule(FILE *fpout, ASSOCRULE *prule, PAT_INFO *ppat_info);

	void AR_AddOneAttr(char* szinput_name, ASSOCRULE *prule, char* szattr_name, PAT_INFO *ppat_info_array, FILE *fpout);
	void search_immd_supersets(int ndisk_pos, int num_of_items, int nextra_item, PAT_INFO *ppat_info_array);
	void tidlist_immd_supersets(char* szinput_name, ASSOCRULE *prule, int nextra_attr_no, PAT_INFO *ppat_info_array, FILE *fpout);
	void output_immd_supersets(FILE *fpout, ASSOCRULE *prule, int nextra_attr_no, PAT_INFO *ppat_info_array);
	
	void AR_ReplaceOneAttr(char* szinput_name, ASSOCRULE *prule, char* szattr_name, PAT_INFO *ppat_info_array, FILE *fpout);
	void search_siblings(int ndisk_pos, int num_of_items, PAT_INFO *ppat_info_array);
	void tidlist_siblings(char* szinput_name, ASSOCRULE *prule, int nexplore_attr_no, PAT_INFO *ppat_info_array, FILE *fpout);
	void output_siblings(FILE *fpout, ASSOCRULE *prule, int nexplore_attr_no, PAT_INFO *ppat_info_array);
	
	void AR_FilterByAttrsNItems(char* szattr_file, char* szitem_file);
	void search_ar(int ndisk_pos, int num_of_items);
	void output_rules(FILE *fp, int npat_len, int num_of_shared_items, ENTRY *pentry, char* ptgt_stat);
	//--- ***


	//--- **** old routines for exploration that are not in use
	void SearchParents(int nrule_no, ASSOCRULE *prule);	
	void SearchChildren(int nrule_no, ASSOCRULE *prule);
	void SearchSiblings(int nrule_no, ASSOCRULE *prule);
	void CompareWithSiblings(int nrule_no, ASSOCRULE *prule);
	void compare_with_siblings(int ndisk_pos, int num_of_items, ASSOCRULE *prule);
	void output_hypotheses(ASSOCRULE *prule, int ndiff_item, int num_of_diff_items);
	//--- **** 

	//====== routines for generating representative rules in a post-processing step
	int GenRepresentativeRules(char* szrule_output_name, char* szoutput_name, int nrep_method, ASSOCRULE *&prep_rules);
	bool IsRepRule(ASSOCRULE *prule, ASSOCRULE *prep_rules, int num_of_rep_sign_rules);
	bool IsRepRule(ASSOCRULE *prule, ASSOCRULE *prep_rules, int num_of_rep_sign_rules, char* ptgt_values);
	bool IsRepRule(ASSOCRULE *prule, int *prep_tid_list, int nrep_tidlist_len, char *prep_tgt_stat, char* ptgt_values);
	//======

	int traverse(int ndisk_pos, int nparent_sup, bool bparent_closed);

public:

	//for hypothesis generation & exploration
	void GenHypotheses(char* szoutput_name);
	void AnalyzeHypotheses(char *szinput_name, char* szhypothesis_filename, char* szoutput_name);
	void FilterHypotheses(char* szinput_filename, char* szoutput_filename);


	void QueryInitialization(char *szoutput_name);
	void QueryTermination();


	//for association rule generation & exploration
	void GenAssocRules(char* szoutput_name);
	void ExploreRules(char* szinput_name, char *szrule_filename, char* szoutput_name);
	void FilterRules(char* szinput_name, char* szoutput_filename);


	//for generating representative association rules in a post-processing step
	void GenRepresentativeRules(char* szoutput_name, int nrep_method);
	void GenRepresentativeRulesAdj(char* szoutput_name, int nrep_method);


	//for verification
	void Traverse(char* szoutput_name);

	// added --------------
	// extended functions, for GUI operations
	void api_ar_single(char *szoutput_filename, int argc, char **szrule, int operation);
	void api_ar_search(char *szoutput_filename, int argc, char **szrule, int operation);

	void api_hyp_single(char *szoutput_filename, int argc, char **szhypothesis, int operation);
	void api_hyp_search(char *szoutput_filename, int argc, char **szhypothesis, int operation);

	/*** new ***/
	void api_ar_single(char *szoutput_filename, int argc, char *szattr_name, char **szrule, int operation);
	void api_hyp_single(char *szoutput_filename, int argc, char *szattr_name, char **szhypothesis, int operation);

	/*** new filter ***/
	void ar_filter(char *szoutput_filename, char *szattr_file, char *szitem_file);
	void hyp_filter(char *szoutput_filename, char *szattr_file, char *szitem_file, char *szcomp_attr_file, char *szcomp_item_file);

private:
	void api_output_contribution(HYPOTHESIS *phypothesis, char *szoutput_filename, int operation);


	void AR_Exact_Match(int nrule_no, ASSOCRULE *prule);
	void AR_exact_match(int ndisk_pos, int num_of_items, PAT_INFO *ppat_info);
	void AR_Subsets(int nrule_no, ASSOCRULE *prule);
	void AR_subsets(int ndisk_pos, int num_of_items);
	void AR_output_subset_rules(FILE *fp, ENTRY *pentry, int num_of_items);
	void output_one_rule(FILE *fp, int npat_len, ENTRY *pentry);

	void AR_recursive(int *items, int len, int start_index, int *combination, int k, int max); 
	
	void HYP_Exact_Match(int nhno, HYPOTHESIS *phypothesis);
	void HYP_Subsets(int nhno, HYPOTHESIS *phypothesis);
	void HYP_subsets(int ndisk_pos, int num_of_items);
	void HYP_output_subsets(FILE *fp, ENTRY *pentry, int num_of_items);
	void HYP_recursive(int *items, int len, int start_index, int *combination, int k, int max, HYPOTHESIS *phypothesis); 

	// for hyp subset match
	int *contained, *optional;

	int num_of_contained, num_of_optional;
};

extern CTREE_EHTA gocfptree_ehta;

extern int gnum_of_contexts;
extern int gnum_of_tests;
extern int gnum_of_sign_hypotheses;
extern int gnum_of_total_sign_hypotheses;
extern int gntotal_context_len;
extern int gntotal_diff_item_num;
extern int gnum_of_BCsign_hypotheses;
extern int gnum_of_BHsign_hypotheses;

extern int gnum_of_output_rules;

extern double gdBC_pvalue_thres;
extern double gdpermFWER_pvalue_thres;

struct SIGN_RULE_NUM
{
	double dBH_pvalue_thres;
	double dpermBH_pvalue_thres; 

	int num_of_sign_rules;
	int num_of_BCsign_rules;
	int num_of_BHsign_rules;

	int num_of_perm_sign_rules;
	int num_of_perm_BCsign_rules;
	int num_of_perm_BHsign_rules;

	int num_of_permFWER_rules;
};

extern SIGN_RULE_NUM gosign_rule_nums;

extern SIGN_RULE_NUM gorep_item_sign_rule_nums;
extern SIGN_RULE_NUM gorep_tidlist_sign_rule_nums;
extern SIGN_RULE_NUM gorep_collective_sign_rule_nums;


extern SIGN_RULE_NUM gosupadj_sign_rule_nums;
extern SIGN_RULE_NUM gosupadj_rep_sign_rule_nums;


extern int gntotal_diff_item_pairs;
extern bool *gphas_paradox_flags;


extern CONTRIBUTION *gpattr_contributions;
extern int gnum_of_analyze_attrs;
extern CONTRIBUTION *gpitem_contributions;
extern int gnum_of_analyze_items;

extern int *gpsup_testnums;
extern int *gptemp_tgt_sup_array;


void get_pat_info(int *ptidlist, int ntidlist_len, PAT_INFO *ppat_info);


//====== routines for output hypotheses ======
void OutputOneHypothesisPlainText(FILE *fp, int *ppattern, int npat_len, PAT_INFO *ppat_info, DIFF_ITEM *pdiff_items, int num_of_diff_items, int nindex1, int nindex2, double dscore, double dpvalue);
void OutputOneHypothesisPlainText(FILE *fp, int *ppattern, int npat_len, PAT_INFO *ppat_info, DIFF_ITEM *pdiff_items, int num_of_diff_items, int num_of_groups);
void OutputOneHypothesisPlainText(FILE *fp, int *ppattern, int npat_len, PAT_INFO *ppat_info, DIFF_ITEM *pdiff_items, int num_of_diff_items, int num_of_groups, double dscore, double dpvalue);
void OutputOneHypothesis(FILE *fp, int nhno, int *ppattern, int npat_len, PAT_INFO *ppat_info, DIFF_ITEM *pdiff_items, int num_of_diff_items, int num_of_groups, double dscore, double dpvalue);
void OutputOneAttrHText(FILE *fp, int *ppattern, int npat_len, PAT_INFO *ppat_info, DIFF_ITEM *pdiff_items, int num_of_diff_items);
void OutputHSummary(char* szoutput_name);
void OutputAnalyzeInfo(FILE *fp, int nitem1, PAT_INFO *preasoning_items1, int nitem2, PAT_INFO *preasoning_items2);
void OutputOneAnalyzeText(char* szoutputname, int nhno, HYPOTHESIS *phypothesis, int nitem1, PAT_INFO *preasoning_items1, int nitem2, PAT_INFO *preasoning_items2);
int comp_diffchange(const void *e1, const void *e2);
int comp_contr(const void *e1, const void *e2);
void OutputAttrNames(char* szoutput_name);
//======

void ConvertHypotheses(char* szhypotheses_filename, char* szoutput_filename);

//------ routines for read hypotheses ------
void LoadOneHypothesis(FILE *fp, HYPOTHESIS *phypothesis, bool bhas_pvalue);
void LoadOneHypothesis(FILE *fp, int &nhno, HYPOTHESIS *phypothesis);
void LoadHSummary(char* szoutput_name);
void LoadAnalysisInfo(FILE *fp, int &nitem1, PAT_INFO *preasoning_items1, int &nitem2, PAT_INFO *preasoning_items2);
void SortHypotheses(char* szoutput_name);
//------


//====== routines for output rules ======
void OutputOneRule(FILE *fp, ASSOCRULE *prule);
void OutputOnePvalue(FILE *fp, int npreorder, int nsup, int ntgt_sup, double dpvalue);
void OutputRuleSummary(char* szoutput_name);
//======
void ConvertRules(char* szrule_filename, char* szattrvalue_filename, char* szoutput_filename);
//------ routines for load rules ------
void LoadOneRule(FILE *fp, ASSOCRULE *prule);
void LoadRuleSummary(char* szoutput_name);
//------

//====== routines for sorting rules and for finding cut-off p-value threshold
void LoadRules(char* szrule_filename, int num_of_rules, ASSOCRULE *prules, int *ppat_buf);
int comp_rule(const void *e1, const void *e2);
void SortRules(char* szoutput_name);
void GetRuleNumNThres(ASSOCRULE *prules, int num_of_rules, SIGN_RULE_NUM *prule_nums);
void OutputSignRuleNums(char* szoutput_name, SIGN_RULE_NUM *psign_rule_nums);
void SortRulesSupAdjusted(char* szoutput_name);
void GetRuleNumSupAdj(ASSOCRULE *prules, int num_of_rules, SIGN_RULE_NUM *prule_nums);

//======

void GetTgtStatDiff(char* ptgt_stat1, char* ptgt_stat2, char* presult_tgt_stat);

int get_intersection(int nlen1, int *pset1, int nlen2, int *pset2, int *presult_set);
int get_intersectNdiffset(int nlen1, int *pset1, int nlen2, int *pset2, int *presult_set, int* pdiff_set, int &ndiff_len);



//===============
// added, in cfptree_ehta_extend.cpp
void OutputOneHypothesis(FILE *fp, HYPOTHESIS *phypothesis, bool bhas_pvalue);

int comp_tidlist_nodes_status(const void *e1, const void* e2);
int comp_tidlist_nodes_item(const void *e1, const void* e2);


inline void CopyPatInfo(int npreorder, int nsupport, char *ptgt_stat, PAT_INFO *ppat_info)
{
	ppat_info->npreorder = npreorder;
	ppat_info->nsupport = nsupport;
	memcpy(ppat_info->ptgt_stat, ptgt_stat, gntgt_stat_size);

	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
	{
		TGT_SUM *ptgt_sum;

		ptgt_sum = (TGT_SUM*)ptgt_stat;
		ppat_info->dmean = ptgt_sum->dsum/ppat_info->nsupport;
		ppat_info->dstd_dev = sqrt((ptgt_sum->dsquare_sum-ptgt_sum->dsum*ppat_info->dmean)/(ppat_info->nsupport-1));
	}
	else if(gsztarget_value[0]!=0)
		ppat_info->dmean = (double)(((int*)ppat_info->ptgt_stat)[0])/ppat_info->nsupport;
}

inline void CopyPatInfo(PAT_INFO *psrc_pat_info, PAT_INFO *ptgt_pat_info)
{
	ptgt_pat_info->nsupport = psrc_pat_info->nsupport;
	ptgt_pat_info->dmean = psrc_pat_info->dmean;
	ptgt_pat_info->dstd_dev = psrc_pat_info->dstd_dev;
	memcpy(ptgt_pat_info->ptgt_stat, psrc_pat_info->ptgt_stat, gntgt_stat_size);
}

inline void CopyPatInfo(DIFF_ITEM *pdiff_item, PAT_INFO *ppat_info)
{
	ppat_info->nsupport = pdiff_item->nsupport;
	ppat_info->dmean = pdiff_item->dmean;
	ppat_info->dstd_dev = pdiff_item->dstd_dev;
	memcpy(ppat_info->ptgt_stat, pdiff_item->ptgt_stat, gntgt_stat_size);
}

inline void CopyDiffItem(PAT_INFO *ppat_info, DIFF_ITEM *pdiff_item)
{
	pdiff_item->npreorder = ppat_info->npreorder;
	pdiff_item->nsupport = ppat_info->nsupport;
	pdiff_item->dmean = ppat_info->dmean;
	pdiff_item->dstd_dev = ppat_info->dstd_dev;
	memcpy(pdiff_item->ptgt_stat, ppat_info->ptgt_stat, gntgt_stat_size);
}

inline void CopyDiffItem(ENTRY *pentry, char* ptgt_stat, DIFF_ITEM *pdiff_item)
{
	pdiff_item->npreorder = pentry->npreorder;
	pdiff_item->nitem = pentry->item;
	pdiff_item->nsupport = pentry->support;
	memcpy(pdiff_item->ptgt_stat, ptgt_stat, gntgt_stat_size);

	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
	{
		TGT_SUM *ptgt_sum;

		ptgt_sum = (TGT_SUM*)ptgt_stat;
		pdiff_item->dmean = ptgt_sum->dsum/pdiff_item->nsupport;
		pdiff_item->dstd_dev = sqrt((ptgt_sum->dsquare_sum-ptgt_sum->dsum*pdiff_item->dmean)/(pdiff_item->nsupport-1));
	}
	else if(gsztarget_value[0]!=0)
		pdiff_item->dmean = (double)(((int*)ptgt_stat)[0])/pdiff_item->nsupport;
}



inline int get_missing_item(ASSOCRULE *prule, int *ppattern, int npat_len, char* pitem_bitmap)
{
	int i, nitem;

	if(npat_len!=prule->npat_len-1)
	{
		LogErrMsg("", "get_missing_item", "Error: more than one item is missing");
		nitem = -1;
	}
	else
	{
		for(i=0;i<npat_len;i++)
			pitem_bitmap[ppattern[i]] |= EXPLORE_FOUND_IN_PREFIX;

		for(i=0;i<prule->npat_len;i++)
		{
			if((pitem_bitmap[prule->pattern[i]] & EXPLORE_FOUND_IN_PREFIX)==0)
			{
				nitem = prule->pattern[i];
				break;
			}
		}

		for(i=0;i<npat_len;i++)
			pitem_bitmap[ppattern[i]] -= EXPLORE_FOUND_IN_PREFIX;
	}

	return nitem;
}

inline int ResizeArray(int* &parray, int ncapacity, int nsize)
{
	DelIntArray(parray, ncapacity);

	ncapacity = 2*ncapacity;
	if(ncapacity<nsize)
		ncapacity = nsize;

	parray = NewIntArray(ncapacity);
	return ncapacity;
}

