#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/timeb.h>
#include <math.h>

#include <string>
#include <vector>
#include <map>
using namespace std;

#include "global.h"

int gndb_size;
int gntgt_sup;
int gnmin_sup;
int gnmax_sup;

int gnum_of_tests;
double gdmax_pvalue;
double gdpermFWER_pvalue_thres;

int gnmine_min_sup;
double gdmine_min_conf;


void MineRules(char* szdataset_name, char* sztarget_attr, char* sztarget_value, int nmin_sup, int nmax_len, double dmax_pvalue, double dmax_local_pvalue, int neffect_size_method, double deffect_size, int ncorrection_method, int num_of_repetitions, int nseeding_method, char* szoutput_name)
{
	FILE *fp;
	char szcmd[500];

	fp = fopen("ehta.para", "wt");
	if(fp==NULL)
	{
		printf("Error: cannot open file asgen.conf for write\n");
		return;
	}

	fprintf(fp, "data_file=%s.data\n", szdataset_name);
	fprintf(fp, "names_file=%s.names\n", szdataset_name);

	fprintf(fp, "materialization_mode=freq only\n");
	fprintf(fp, "signal_type=rule\n");
	fprintf(fp, "context_attributes=\n");
	fprintf(fp, "comparing_attributes=\n");
	fprintf(fp, "target_attribute=%s\n", sztarget_attr);
	fprintf(fp, "target_value=%s\n", sztarget_value);
	fprintf(fp, "test-statistic=fisher's exact test\n");
	//fprintf(fp, "test-statistic=x2\n");
	fprintf(fp, "compare_mode=pairwise\n");

	fprintf(fp, "min_sup=%d\n", nmin_sup);
	fprintf(fp, "max_len=%d\n", nmax_len);
	fprintf(fp, "max_pvalue=%f\n", dmax_pvalue);
	fprintf(fp, "max_local_pvalue=%f\n", dmax_local_pvalue);
	if(neffect_size_method==0)
		fprintf(fp, "effect_size_method=confidence\n");
	else 
		fprintf(fp, "effect_size_method=odds_ratio\n");
	fprintf(fp, "min_diff=%f\n", deffect_size);

	if(ncorrection_method>=0)
	{
		if(ncorrection_method==PERMUTATION)
			fprintf(fp, "correction_method=permutation\n");
		else if(ncorrection_method==PERMUTATION_SUPERSET)
			fprintf(fp, "correction_method=permutation_superset\n");
		else if(ncorrection_method==SIMULATED_PERM)
			fprintf(fp, "correction_method=simulated_perm\n");
		else if(ncorrection_method==SIMULATED_PERM_SUBSET)
			fprintf(fp, "correction_method=simulated_perm_subset\n");
		else if(ncorrection_method==SIMULATED_PERM_SUBSET_SIB)
			fprintf(fp, "correction_method=simulated_perm_subset_sib\n");
		else if(ncorrection_method==SIMULATED_PERM_SUBSET_LEFT_SIB)
			fprintf(fp, "correction_method=simulated_perm_subset_left_sib\n");
		else if(ncorrection_method==SIMULATED_PERM_SUBSET_ALLSIB)
			fprintf(fp, "correction_method=simulated_perm_subset_allsib\n");
		else if(ncorrection_method==SIMULATED_PERM_SUPERSET)
			fprintf(fp, "correction_method=simulated_perm_superset\n");
		else if(ncorrection_method==SIMULATED_PERM_HYBRID)
			fprintf(fp, "correction_method=simulated_perm_hybrid\n");
		fprintf(fp, "num_of_repetitions=%d\n", num_of_repetitions);
	}

	if(nseeding_method>=0)
	{
		if(nseeding_method==SEEDING_GLOBAL)
			fprintf(fp, "seeding_method=global\n");
		else if(nseeding_method==SEEDING_PER_RULE)
			fprintf(fp, "seeding_method=per rule\n");
		else if(nseeding_method==SEEDING_PER_PERM)
			fprintf(fp, "seeding_method=per perm\n");	
	}

	fprintf(fp, "data_perm_method=swap\n");
	fprintf(fp, "output_mode=all\n");
	//fprintf(fp, "output_mode=representative\n");
	fprintf(fp, "gen_tid_list=yes\n");
	fprintf(fp, "output=%s\n", szoutput_name);
	
	fclose(fp);

	//sprintf(szcmd, ".\\exec\\EHTA_syn ehta.para\n");
	sprintf(szcmd, ".\\ARminer ehta.para\n");
	printf(szcmd);
	system(szcmd);
}

void MineRules(char* szdataset_name, char* sztarget_attr, char* sztarget_value, int nmin_sup, int nmax_len, double dmax_pvalue, double deffect_size, int ncorrection_method, int num_of_repetitions, double dpvalue_buf_size, bool bperm_diff_list, char* szoutput_name)
{
	FILE *fp;
	char szcmd[500];

	fp = fopen("ehta.para", "wt");
	if(fp==NULL)
	{
		printf("Error: cannot open file asgen.conf for write\n");
		return;
	}

	fprintf(fp, "data_file=%s.data\n", szdataset_name);
	fprintf(fp, "names_file=%s.names\n", szdataset_name);

	fprintf(fp, "materialization_mode=freq only\n");
	fprintf(fp, "signal_type=rule\n");
	fprintf(fp, "context_attributes=\n");
	fprintf(fp, "comparing_attributes=\n");
	fprintf(fp, "target_attribute=%s\n", sztarget_attr);
	fprintf(fp, "target_value=%s\n", sztarget_value);
	fprintf(fp, "test-statistic=fisher's exact test\n");
	//fprintf(fp, "test-statistic=x2\n");

	fprintf(fp, "min_sup=%d\n", nmin_sup);
	fprintf(fp, "max_len=%d\n", nmax_len);
	fprintf(fp, "max_pvalue=%f\n", dmax_pvalue);
	fprintf(fp, "max_local_pvalue=0.05\n");
	fprintf(fp, "effect_size_method=confidence\n");
	fprintf(fp, "min_diff=%f\n", deffect_size);

	if(ncorrection_method>=0)
	{
		if(ncorrection_method==PERMUTATION)
			fprintf(fp, "correction_method=permutation\n");
		else if(ncorrection_method==PERMUTATION_SUPERSET)
			fprintf(fp, "correction_method=permutation_superset\n");
		else if(ncorrection_method==SIMULATED_PERM)
			fprintf(fp, "correction_method=simulated_perm\n");
		else if(ncorrection_method==SIMULATED_PERM_SUBSET)
			fprintf(fp, "correction_method=simulated_perm_subset\n");
		else if(ncorrection_method==SIMULATED_PERM_SUBSET_SIB)
			fprintf(fp, "correction_method=simulated_perm_subset_sib\n");
		else if(ncorrection_method==SIMULATED_PERM_SUBSET_LEFT_SIB)
			fprintf(fp, "correction_method=simulated_perm_subset_left_sib\n");
		else if(ncorrection_method==SIMULATED_PERM_SUBSET_ALLSIB)
			fprintf(fp, "correction_method=simulated_perm_subset_allsib\n");
		else if(ncorrection_method==SIMULATED_PERM_SUPERSET)
			fprintf(fp, "correction_method=simulated_perm_superset\n");
		else if(ncorrection_method==SIMULATED_PERM_HYBRID)
			fprintf(fp, "correction_method=simulated_perm_hybrid\n");
		fprintf(fp, "num_of_repetitions=%d\n", num_of_repetitions);
	}

	fprintf(fp, "seeding_method=global\n");
	fprintf(fp, "data_perm_method=swap\n");

	fprintf(fp, "pvalue_buf_size=%.3f\n", dpvalue_buf_size);
	if(bperm_diff_list)
		fprintf(fp, "perm_diff_list=yes\n");
	else
		fprintf(fp, "perm_diff_list=no\n");

	fprintf(fp, "output_mode=all\n");
	//fprintf(fp, "output_mode=representative\n");
	fprintf(fp, "output=%s\n", szoutput_name);
	
	fclose(fp);

	sprintf(szcmd, ".\\exec\\EHTA ehta.para\n");
	printf(szcmd);
	system(szcmd);
}


//get dataset size and #positives in the dataset
void ReadTreeStatis(char* szoutput_name)
{
	FILE *fp;
	char szcfp_stat_filename[200];
	int num_of_attrs, num_of_items, ntgt_attr_type, nmax_dfsentries_len, nmax_dfs_sup_sum;
	int ntree_size, ntree_page_size, num_of_entries, nmax_tree_depth, nmax_pattern_len;

	sprintf(szcfp_stat_filename, "%s.cfptree.stat", szoutput_name);
	fp = fopen(szcfp_stat_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szcfp_stat_filename);
		return;
	}

	fscanf(fp, "%d", &gndb_size);
	fscanf(fp, "%d", &gnmin_sup);
	fscanf(fp, "%d", &gnmax_sup);
	fscanf(fp, "%d", &num_of_attrs);
	fscanf(fp, "%d", &num_of_items);
	fscanf(fp, "%d", &ntree_size);
	fscanf(fp, "%d", &ntree_page_size);
	fscanf(fp, "%d", &num_of_entries);
	fscanf(fp, "%d", &nmax_tree_depth);
	fscanf(fp, "%d", &nmax_pattern_len);
	fscanf(fp, "%d", &nmax_dfsentries_len);
	fscanf(fp, "%d", &nmax_dfs_sup_sum);
	fscanf(fp, "%d", &ntgt_attr_type);
	fscanf(fp, "%d", &gntgt_sup);

	fclose(fp);
}

double GetRunTime()
{
	FILE *fp;

	fp = fopen("rules.time.txt", "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file rules.time.txt for read\n");
		return -1;
	}
	else
	{
		double drun_time;

		fscanf(fp, "%lf", &drun_time);
		fclose(fp);

		return drun_time;
	}
}

int LoadMinedRules(char* szoutput_name, ASSOCRULE * &prules, int *& pitemset_buf, int * &ptidlist_buf, int &nmax_rule_len)
{
	FILE *fp;
	char szrule_filename[200];
	int npreorder, npat_len, nitem, nsup, ntgt_sup, i;
	double dscore, dpvalue, dadjusted_pvalue, dcond_pvalue;
	int num_of_rules, npat_buf_size, npat_buf_pos, ntidlist_buf_size, ntidlist_buf_pos, nclass_no;

	sprintf(szrule_filename, "%s.rules.txt", szoutput_name);
	fp = fopen(szrule_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szrule_filename);
		return 0;
	}

	num_of_rules = 0;
	npat_buf_size = 0;
	ntidlist_buf_size = 0;
	nmax_rule_len = 0;

	fscanf(fp, "%d", &npreorder);
	while(!feof(fp))
	{
		fscanf(fp, "%d", &npat_len);
		if(nmax_rule_len<npat_len)
			nmax_rule_len = npat_len;

		for(i=0;i<npat_len;i++)
			fscanf(fp, "%d", &nitem);
		fscanf(fp, "%d", &nsup);
		if(gnum_of_classes>2)
			fscanf(fp, "%d", &nclass_no);
		fscanf(fp, "%d", &ntgt_sup);

		fscanf(fp, "%lf", &dscore);
		fscanf(fp, "%lf", &dpvalue);
		fscanf(fp, "%lf", &dadjusted_pvalue);
		fscanf(fp, "%lf", &dcond_pvalue);

		num_of_rules++;
		npat_buf_size += npat_len;
		ntidlist_buf_size += nsup;

		fscanf(fp, "%d", &npreorder);
	}
	rewind(fp);

	prules = new ASSOCRULE[num_of_rules];
	pitemset_buf = new int[npat_buf_size];
	ptidlist_buf = new int[ntidlist_buf_size];
	num_of_rules = 0;

	npat_buf_pos = 0;
	ntidlist_buf_pos = 0;
	fscanf(fp, "%d", &npreorder);
	while(!feof(fp))
	{
		prules[num_of_rules].npreorder = npreorder;
		fscanf(fp, "%d", &prules[num_of_rules].npat_len);

		prules[num_of_rules].pattern = &pitemset_buf[npat_buf_pos];
		npat_buf_pos += prules[num_of_rules].npat_len;
		for(i=0;i<prules[num_of_rules].npat_len;i++)
			fscanf(fp, "%d", &prules[num_of_rules].pattern[i]);
		qsort(prules[num_of_rules].pattern, prules[num_of_rules].npat_len, sizeof(int), comp_int);

		fscanf(fp, "%d", &prules[num_of_rules].nsup);
		if(gnum_of_classes>2)
			fscanf(fp, "%d", &prules[num_of_rules].nclass_no);
		else
			prules[num_of_rules].nclass_no = 1;
		fscanf(fp, "%d", &prules[num_of_rules].ntgt_sup);
		prules[num_of_rules].ptid_list = &ptidlist_buf[ntidlist_buf_pos];
		ntidlist_buf_pos += prules[num_of_rules].nsup;

		fscanf(fp, "%lf", &prules[num_of_rules].dscore);
		fscanf(fp, "%lf", &prules[num_of_rules].dpvalue);
		fscanf(fp, "%lf", &prules[num_of_rules].dadjusted_pvalue);
		fscanf(fp, "%lf", &prules[num_of_rules].dcond_pvalue);
		num_of_rules++;

		fscanf(fp, "%d", &npreorder);
	}
	fclose(fp);

	if(npat_buf_pos!=npat_buf_size)
		printf("Error: inconsistent pattern buffer size\n");
	if(ntidlist_buf_pos!=ntidlist_buf_size)
		printf("Error: inconsistent tid list buffer size\n");

	return num_of_rules;
}

void LoadTgtValueMap(char* szoutput_name, map<string, int> *ptgtvalue_map)
{
	FILE *fp;
	char sztgtmap_filename[200], sztgt_value[100];
	int nclass_no;

	sprintf(sztgtmap_filename, "%s.tgtvalues.map", szoutput_name);
	fp = fopen(sztgtmap_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", sztgtmap_filename);
		return;
	}

	nclass_no = 0;
	fscanf(fp, "%s", sztgt_value);
	while(!feof(fp))
	{
		if(sztgt_value[0]!=0)
			(*ptgtvalue_map)[sztgt_value] = nclass_no;
		else
			printf("Error: empty line at line %d\n", nclass_no+1);
		nclass_no++;
		fscanf(fp, "%s", sztgt_value);
	}
	fclose(fp);
}

int LoadPatDirNodes(char* szoutput_name, PAT_DIR_NODE *&ppat_dir_nodes)
{
	FILE *fp;
	char szoutput_filename[200];
	int ntotal_entries, num_of_lists, nmax_dfs_list_len, nmax_tidlist_len;

	sprintf(szoutput_filename, "%s.tidlist.dir", szoutput_name);
	fp = fopen(szoutput_filename, "rb");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szoutput_filename);
		return 0;
	}

	fread(&ntotal_entries, sizeof(int), 1, fp);
	fread(&num_of_lists, sizeof(int), 1, fp);
	fread(&nmax_dfs_list_len, sizeof(int), 1, fp);
	fread(&nmax_tidlist_len, sizeof(int), 1, fp);

	ppat_dir_nodes = new PAT_DIR_NODE[ntotal_entries];

	fread(ppat_dir_nodes, sizeof(PAT_DIR_NODE), ntotal_entries, fp);

	fclose(fp);

	return ntotal_entries;
}


void LoadTgtValues(char* szoutput_name, void *ptgt_values)
{
	FILE *fp;
	char sztgt_filename[200];

	sprintf(sztgt_filename, "%s.tgtvalues", szoutput_name);
	fp = fopen(sztgt_filename, "rb");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", sztgt_filename);
		return;
	}

	fread(ptgt_values, sizeof(int), gndb_size, fp);

	fclose(fp);
}

int LoadOneTidList(FILE *fp, int ndisk_pos, int *ptid_list)
{
	int nfile_pos, nlist_len;

	nfile_pos = ftell(fp);
	if(nfile_pos!=ndisk_pos)
		fseek(fp, ndisk_pos-nfile_pos, SEEK_CUR);

	fread(&nlist_len, sizeof(int), 1, fp);
	fread(ptid_list, sizeof(int), nlist_len, fp);

	return nlist_len;
}


int GetOneTidList(FILE *fp, PAT_DIR_NODE *ppat_dir_nodes, int npreorder, int *ppat_tid_list, int *ptemp_list1, int *ptemp_list2, int *ptemp_list3)
{
	int npat_tidlist_len, ndiff_list_len, nunion_len, ndiff_len, *ptemp_list, i, j;

	npat_tidlist_len = 0;

	if(ppat_dir_nodes[npreorder].nflag & (1<<DIFF_LIST_FLAG_BITPOS)) //diff list
	{
		ndiff_list_len = 0;
		while(npreorder>=0 && (ppat_dir_nodes[npreorder].nflag & (1<<DIFF_LIST_FLAG_BITPOS)))
		{
			npat_tidlist_len = LoadOneTidList(fp, ppat_dir_nodes[npreorder].ntidlist_pos, ptemp_list3);
			nunion_len = get_union(ndiff_list_len, ptemp_list1, npat_tidlist_len, ptemp_list3, ptemp_list2);
			if(nunion_len!=ndiff_list_len+npat_tidlist_len)
				printf("Error: there should no no overlap between two consecutive diff-list\n");
			ndiff_list_len = nunion_len;
			ptemp_list = ptemp_list2;
			ptemp_list2 = ptemp_list1;
			ptemp_list1 = ptemp_list;
			npreorder = ppat_dir_nodes[npreorder].nparent_preorder;
			while(npreorder>=0 && (ppat_dir_nodes[npreorder].nflag & (1<<CLOSED_FLAG_BITPOS))==0)
				npreorder = ppat_dir_nodes[npreorder].nparent_preorder;
		}
		if(npreorder>=0)
		{
			npat_tidlist_len = LoadOneTidList(fp, ppat_dir_nodes[npreorder].ntidlist_pos, ptemp_list3);
			ndiff_len = get_diffset(npat_tidlist_len, ptemp_list3, ndiff_list_len, ptemp_list1, ppat_tid_list);
			if(ndiff_len!=npat_tidlist_len-ndiff_list_len)
				printf("Error: the tid list length should be the difference between the parent list length and the diff-list length\n");
			npat_tidlist_len = ndiff_len;
		}
		else if(npreorder==-1)
		{
			i = 0;
			j = 0;
			npat_tidlist_len = 0;
			while(i<gndb_size && j<ndiff_list_len)
			{
				if(i==ptemp_list1[j])
				{
					i++;
					j++;
				}
				else if(i<ptemp_list1[j])
					ppat_tid_list[npat_tidlist_len++] = i++;
				else
					j++;
			}
			while(i<gndb_size)
				ppat_tid_list[npat_tidlist_len++] = i++;
		}
		else
			printf("Error: the preorder of the entry cannot be -2\n");
	}
	else
		npat_tidlist_len = LoadOneTidList(fp, ppat_dir_nodes[npreorder].ntidlist_pos, ppat_tid_list);
	
	return npat_tidlist_len;
}


void LoadMinedTidList(char *szoutput_name, ASSOCRULE *prules, int num_of_rules)
{
	FILE *fp;
	char sztidlist_filename[200];
	int ntotal_entries, i, ntidlist_len, *ptemp_list1, *ptemp_list2, *ptemp_list3;
	PAT_DIR_NODE *ppat_dir_nodes;

	ntotal_entries = LoadPatDirNodes(szoutput_name, ppat_dir_nodes);

	sprintf(sztidlist_filename, "%s.tidlist", szoutput_name);
	fp = fopen(sztidlist_filename, "rb");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", sztidlist_filename);
		return;
	}

	ptemp_list1 = new int[gndb_size];
	ptemp_list2 = new int[gndb_size];
	ptemp_list3 = new int[gndb_size];

	for(i=0;i<num_of_rules;i++)
	{
		ntidlist_len =  GetOneTidList(fp, ppat_dir_nodes, prules[i].npreorder, prules[i].ptid_list, ptemp_list1, ptemp_list2, ptemp_list3);
		if(ntidlist_len!=prules[i].nsup)
			printf("Error: inconsistent pattern support\n");

	}
	fclose(fp);
	
	delete []ptemp_list1;
	delete []ptemp_list2;
	delete []ptemp_list3;

	delete []ppat_dir_nodes;
}


void LoadSignRuleNums(char* szoutput_name, SIGN_RULE_NUM *psign_rule_nums)
{
	FILE *fp;
	char szoutput_filename[200];

	sprintf(szoutput_filename, "%s.rulenum.txt", szoutput_name);
	fp = fopen(szoutput_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szoutput_filename);
		return;
	}

	fscanf(fp, "%d", &gnum_of_tests);
	fscanf(fp, "%lf", &gdmax_pvalue);
	psign_rule_nums->dpvalue_thres = gdmax_pvalue;
	psign_rule_nums->dBC_pvalue_thres = gdmax_pvalue/gnum_of_tests;
	fscanf(fp, "%lf", &psign_rule_nums->dBH_pvalue_thres);
	fscanf(fp, "%lf", &gdpermFWER_pvalue_thres);
	psign_rule_nums->dperm_FWER_thres = gdpermFWER_pvalue_thres;
	fscanf(fp, "%lf", &psign_rule_nums->dpermBH_pvalue_thres);

	fscanf(fp, "%d", &psign_rule_nums->num_of_sign_rules);
	fscanf(fp, "%d", &psign_rule_nums->num_of_BCsign_rules);
	fscanf(fp, "%d", &psign_rule_nums->num_of_BHsign_rules);
	fscanf(fp, "%d", &psign_rule_nums->num_of_perm_sign_rules);
	fscanf(fp, "%d", &psign_rule_nums->num_of_perm_BCsign_rules);
	fscanf(fp, "%d", &psign_rule_nums->num_of_perm_BHsign_rules);
	fscanf(fp, "%d", &psign_rule_nums->num_of_permFWER_rules);

	fclose(fp);
}

void GetPermPvalues(char* szoutput_name, int num_of_permutations, int nperm_no, char *szoutput_filename)
{
	FILE *fp, *fpout;
	char szperm_filename[200], szrule_output_name[200];
	SIGN_RULE_NUM thesign_rule_num;
	int i;
	double *pvalues;

	sprintf(szrule_output_name, "%s.sorted", szoutput_name);
	LoadSignRuleNums(szrule_output_name, &thesign_rule_num);

	sprintf(szperm_filename, "%s.perm", szoutput_name);
	fp = fopen(szperm_filename, "rb");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szperm_filename);
		return;
	}
	fpout = fopen(szoutput_filename, "wt");
	if(fpout==NULL)
	{
		printf("Error: cannot open file %s for read\n", szoutput_filename);
		return;
	}

	pvalues = new double[num_of_permutations];
	for(i=0;i<gnum_of_tests;i++)
	{
		fread(pvalues, sizeof(double), num_of_permutations, fp);
		fprintf(fpout, "%.3E\n", pvalues[nperm_no]);
	}
	fclose(fp);
	fclose(fpout);

	delete []pvalues;
}

int get_intersection(int nlen1, int *pset1, int nlen2, int *pset2, int *presult_set)
{
	int i, j, nlen;

	i = 0;
	j = 0;
	nlen = 0;
	while(i<nlen1 && j<nlen2)
	{
		if(pset1[i]==pset2[j])
		{
			presult_set[nlen++] = pset1[i++];
			j++;
		}
		else if(pset1[i]<pset2[j])
			i++;
		else 
			j++;
	}

	return nlen;
}

int get_intersection_size(int nlen1, int *pset1, int nlen2, int *pset2)
{
	int i, j, nlen;

	i = 0;
	j = 0;
	nlen = 0;
	while(i<nlen1 && j<nlen2)
	{
		if(pset1[i]==pset2[j])
		{
			i++;
			j++;
			nlen++;
		}
		else if(pset1[i]<pset2[j])
			i++;
		else 
			j++;
	}
	//printf("pass\n");
	return nlen;
}


int get_union(int nlen1, int *pset1, int nlen2, int *pset2, int *presult_set)
{
	int i, j, nresult_len;

	i = 0;
	j = 0;
	nresult_len = 0;
	while(i<nlen1 && j<nlen2)
	{
		if(pset1[i]==pset2[j])
		{
			presult_set[nresult_len++] = pset1[i++];
			j++;
		}
		else if(pset1[i]<pset2[j])
			presult_set[nresult_len++] = pset1[i++];
		else 
			presult_set[nresult_len++] = pset2[j++];
	}
	while(i<nlen1)
		presult_set[nresult_len++] = pset1[i++];
	while(j<nlen2)
		presult_set[nresult_len++] = pset2[j++];

	return nresult_len;
}


int get_diffset(int nlen1, int *pset1, int nlen2, int *pset2, int *presult_set)
{
	int i, j, nresult_len;

	i = 0;
	j = 0;
	nresult_len = 0;
	while(i<nlen1 && j<nlen2)
	{
		if(pset1[i]<pset2[j])
			presult_set[nresult_len++] = pset1[i++];
		else if(pset1[i]==pset2[j])
		{
			i++;
			j++;
		}
		else 
			j++;
	}
	while(i<nlen1)
		presult_set[nresult_len++] = pset1[i++];

	return nresult_len;
}

int GetTestNum(char* szoutput_name)
{
	FILE *fp;
	char szrulenum_filename[200];
	int num_of_tests;

	sprintf(szrulenum_filename, "%s.sorted.rulenum.txt", szoutput_name);
	fp = fopen(szrulenum_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szrulenum_filename);
		return 0;
	}

	fscanf(fp, "%d", &num_of_tests);

	fclose(fp);

	return num_of_tests;
}

void LoadOneRunMinPvalues(char* szoutput_name, double **ppmin_pvalues, int num_of_repetitions, int nrun_no)
{
	FILE *fp;
	char szminpvalue_filename[200];
	int nsup, nrow_no;
	double dconf, dpvalue;

	sprintf(szminpvalue_filename, "%s.perm.minpvalues", szoutput_name);
	fp = fopen(szminpvalue_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szminpvalue_filename);
		return;
	}

	nrow_no = 0;

	fscanf(fp, "%d", &nsup);
	while(!feof(fp))
	{
		fscanf(fp, "%lf", &dconf);
		fscanf(fp, "%lf", &dpvalue);

		ppmin_pvalues[nrow_no][nrun_no] = dpvalue;
		nrow_no++;

		fscanf(fp, "%d", &nsup);
	}
	fclose(fp);

	if(nrow_no!=num_of_repetitions)
		printf("Error: inconsistent number of permutations\n");
}

void LoadOneRunAdjPvalues(char* szoutput_name, double **pprule_pvalues, int num_of_tests, int nrun_no)
{
	FILE *fp;
	char szpvalue_filename[200];
	int npreorder, nsup, nrow_no;
	double dorig_pvalue, dadj_pvalue;

	sprintf(szpvalue_filename, "%s.pvalues", szoutput_name);
	fp = fopen(szpvalue_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szpvalue_filename);
		return;
	}

	nrow_no = 0;

	fscanf(fp, "%.lf", &npreorder);
	while(!feof(fp))
	{
		fscanf(fp, "%d", &nsup);
		fscanf(fp, "%lf", &dorig_pvalue);
		fscanf(fp, "%lf", &dadj_pvalue);

		pprule_pvalues[nrow_no][nrun_no] = dadj_pvalue;
		nrow_no++;

		fscanf(fp, "%.lf", &npreorder);
	}
	fclose(fp);

	if(nrow_no!=num_of_tests)
		printf("Error: inconsistent number of tests\n");
}

int comp_double(const void *e1, const void *e2)
{
	double d1, d2;
	d1 = *(double*) e1;
	d2 = *(double*) e2;

	if (d1>d2)
		return 1;
	else if (d1<d2)
		return -1;
	else
		return 0;
}

void GetPvalueStat(double **pppvalues, int num_of_rows, int num_of_runs, PVALUE_STAT *ppvalues_stat)
{
	int i, j;

	for(i=0;i<num_of_rows;i++)
	{
		ppvalues_stat[i].davg = 0;
		ppvalues_stat[i].dstd_dev = 0;
		ppvalues_stat[i].dmin = 1;
		ppvalues_stat[i].dmax = 0;
		ppvalues_stat[i].dmedian = 0;

		for(j=0;j<num_of_runs;j++)
		{
			ppvalues_stat[i].davg += pppvalues[i][j];
			ppvalues_stat[i].dstd_dev += pppvalues[i][j]*pppvalues[i][j];
		}
		ppvalues_stat[i].davg /= num_of_runs;
		qsort(pppvalues[i], num_of_runs, sizeof(double), comp_double);
		ppvalues_stat[i].dmin = pppvalues[i][0];
		ppvalues_stat[i].dmax = pppvalues[i][num_of_runs-1];
		ppvalues_stat[i].dmedian = pppvalues[i][(num_of_runs-1)/2];

		ppvalues_stat[i].dstd_dev = ppvalues_stat[i].dstd_dev-num_of_runs*ppvalues_stat[i].davg*ppvalues_stat[i].davg;
		if(ppvalues_stat[i].dmax>ppvalues_stat[i].dmin && num_of_runs>1 && ppvalues_stat[i].dstd_dev>0)
			ppvalues_stat[i].dstd_dev = sqrt(ppvalues_stat[i].dstd_dev/(num_of_runs-1));
		else 
			ppvalues_stat[i].dstd_dev = 0;

	}
}

int comp_pvalue_stat(const void *e1, const void *e2)
{
	PVALUE_STAT *p1, *p2; 

	p1 = (PVALUE_STAT*) e1;
	p2 = (PVALUE_STAT*) e2;

	if(p1->davg < p2->davg)
		return -1;
	else if(p1->davg > p2->davg)
		return 1;
	else if(p1->dmedian < p2->dmedian)
		return -1;
	else if(p1->dmedian > p2->dmedian)
		return 1;
	else
		return 0;
}

void OutputPvalueStat(PVALUE_STAT *ppvalue_stat, int num_of_rows, char *szoutput_filename)
{
	FILE *fp;
	int i;

	fp = fopen(szoutput_filename, "wt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for write\n", szoutput_filename);
		return;
	}

	qsort(ppvalue_stat, num_of_rows, sizeof(PVALUE_STAT), comp_pvalue_stat);
	for(i=0;i<num_of_rows;i++)
		fprintf(fp, "%.3E\t%.3E\t%.3E\t%.3E\t%.3E\n", ppvalue_stat[i].davg, ppvalue_stat[i].dstd_dev, ppvalue_stat[i].dmin, ppvalue_stat[i].dmax, ppvalue_stat[i].dmedian);

	fclose(fp);
}

void GetAvgPermPvalues(char* szdataset_name, char* sztarget_attr, char* sztarget_value, int nmin_sup, int nmax_len, double dmax_pvalue, double dmax_local_pvalue, int neffect_size_method, double deffect_size, int ncorrection_method, int num_of_repetitions, int nseeding_method, int num_of_runs, char* szoutput_name)
{
	char szrule_output_name[200], szminpvalue_stat_filename[200], szadjpvalue_stat_filename[200];
	int i, j, num_of_tests;
	double **ppmin_pvalues, *pmin_pvalues_buf, **pprule_pvalues, *prule_pvalues_buf;
	PVALUE_STAT *pminpvalues_stat, *ppvalue_stat;

	if(ncorrection_method<0)
		return;

	sprintf(szrule_output_name, "temp");
	ppmin_pvalues = new double*[num_of_repetitions];
	pmin_pvalues_buf = new double[num_of_repetitions*num_of_runs];
	for(i=0;i<num_of_repetitions;i++)
		ppmin_pvalues[i] = &pmin_pvalues_buf[i*num_of_runs];

	for(i=0;i<num_of_runs;i++)
	{
		printf("%d-th run\n", i+1);
		MineRules(szdataset_name, sztarget_attr, sztarget_value, nmin_sup, nmax_len, dmax_pvalue, dmax_local_pvalue, neffect_size_method, deffect_size, ncorrection_method, num_of_repetitions, nseeding_method, szrule_output_name);
		if(i==0)
		{
			num_of_tests = GetTestNum(szrule_output_name);
			pprule_pvalues = new double*[num_of_tests];
			prule_pvalues_buf = new double[num_of_tests*num_of_runs];
			for(j=0;j<num_of_tests;j++)
				pprule_pvalues[j] = &prule_pvalues_buf[j*num_of_runs];
		}
		LoadOneRunMinPvalues(szrule_output_name, ppmin_pvalues, num_of_repetitions, i);
		LoadOneRunAdjPvalues(szrule_output_name, pprule_pvalues, num_of_tests, i);
	}

	pminpvalues_stat = new PVALUE_STAT[num_of_repetitions];
	GetPvalueStat(ppmin_pvalues, num_of_repetitions, num_of_runs, pminpvalues_stat);
	sprintf(szminpvalue_stat_filename, "%s.minpvalue.stat", szoutput_name);
	OutputPvalueStat(pminpvalues_stat, num_of_repetitions, szminpvalue_stat_filename);

	delete []ppmin_pvalues;
	delete []pmin_pvalues_buf;
	delete []pminpvalues_stat;

	ppvalue_stat = new PVALUE_STAT[num_of_tests];
	GetPvalueStat(pprule_pvalues, num_of_tests, num_of_runs, ppvalue_stat);
	sprintf(szadjpvalue_stat_filename, "%s.adjpvalue.stat", szoutput_name);
	OutputPvalueStat(ppvalue_stat, num_of_tests, szadjpvalue_stat_filename);

	delete []pprule_pvalues;
	delete []prule_pvalues_buf;
	delete []ppvalue_stat;
}

void GetAvgPermPvalues(char* szdataset_name, char* sztarget_attr, char* sztarget_value, int nmin_sup, int nmax_len, double dmax_pvalue, double dmax_local_pvalue, int neffect_size_method, double deffect_size, int num_of_repetitions, int nseeding_method, int num_of_runs, char* szoutput_prefix)
{
	char szoutput_name[200];

	sprintf(szoutput_name, "%s.perm", szoutput_prefix);
	GetAvgPermPvalues(szdataset_name, sztarget_attr, sztarget_value, nmin_sup, nmax_len, dmax_pvalue, dmax_local_pvalue, neffect_size_method, deffect_size, PERMUTATION, num_of_repetitions, nseeding_method, num_of_runs, szoutput_name);
	//sprintf(szoutput_name, "%s.perm-superset", szoutput_prefix);
	//GetAvgPermPvalues(szdataset_name, sztarget_attr, sztarget_value, nmin_sup, nmax_len, dmax_pvalue, dmax_local_pvalue, neffect_size_method, deffect_size, PERMUTATION_SUPERSET, num_of_repetitions, nseeding_method, num_of_runs, szoutput_name);
	sprintf(szoutput_name, "%s.simu", szoutput_prefix);
	GetAvgPermPvalues(szdataset_name, sztarget_attr, sztarget_value, nmin_sup, nmax_len, dmax_pvalue, dmax_local_pvalue, neffect_size_method, deffect_size, SIMULATED_PERM, num_of_repetitions, nseeding_method, num_of_runs, szoutput_name);
	sprintf(szoutput_name, "%s.simu-subset", szoutput_prefix);
	GetAvgPermPvalues(szdataset_name, sztarget_attr, sztarget_value, nmin_sup, nmax_len, dmax_pvalue, dmax_local_pvalue, neffect_size_method, deffect_size, SIMULATED_PERM_SUBSET, num_of_repetitions, nseeding_method, num_of_runs, szoutput_name);
	//sprintf(szoutput_name, "%s.simu-subset-sib", szoutput_prefix);
	//GetAvgPermPvalues(szdataset_name, sztarget_attr, sztarget_value, nmin_sup, nmax_len, dmax_pvalue, dmax_local_pvalue, neffect_size_method, deffect_size, SIMULATED_PERM_SUBSET_SIB, num_of_repetitions, nseeding_method, num_of_runs, szoutput_name);
	sprintf(szoutput_name, "%s.simu-subset-leftsib", szoutput_prefix);
	GetAvgPermPvalues(szdataset_name, sztarget_attr, sztarget_value, nmin_sup, nmax_len, dmax_pvalue, dmax_local_pvalue, neffect_size_method, deffect_size, SIMULATED_PERM_SUBSET_LEFT_SIB, num_of_repetitions, nseeding_method, num_of_runs, szoutput_name);
	sprintf(szoutput_name, "%s.simu-subset-allsib", szoutput_prefix);
	GetAvgPermPvalues(szdataset_name, sztarget_attr, sztarget_value, nmin_sup, nmax_len, dmax_pvalue, dmax_local_pvalue, neffect_size_method, deffect_size, SIMULATED_PERM_SUBSET_ALLSIB, num_of_repetitions, nseeding_method, num_of_runs, szoutput_name);
	//sprintf(szoutput_name, "%s.simu-superset", szoutput_prefix);
	//GetAvgPermPvalues(szdataset_name, sztarget_attr, sztarget_value, nmin_sup, nmax_len, dmax_pvalue, dmax_local_pvalue, neffect_size_method, deffect_size, SIMULATED_PERM_SUPERSET, num_of_repetitions, nseeding_method, num_of_runs, szoutput_name);
	//sprintf(szoutput_name, "%s.simu-hybrid", szoutput_prefix);
	//GetAvgPermPvalues(szdataset_name, sztarget_attr, sztarget_value, nmin_sup, nmax_len, dmax_pvalue, dmax_local_pvalue, neffect_size_method, deffect_size, SIMULATED_PERM_HYBRID, num_of_repetitions, nseeding_method, num_of_runs, szoutput_name);

	//Merge8Files(szoutput_prefix, "minpvalue.stat");
	//Merge8Files(szoutput_prefix, "adjpvalue.stat");

}

void Merge8Files(char *szprefix, char* szsuffix)
{
	FILE *fp[8], *fpout;
	char szoutput_filename[200], szline[500], ch[8];
	int i, nlen, nline_no;
	bool beof;

	sprintf(szoutput_filename, "%s.%s", szprefix, szsuffix);
	fpout = fopen(szoutput_filename, "wt");
	if(fpout==NULL)
	{
		printf("Error: cannot open file %s for write\n", szoutput_filename);
		return;
	}

	sprintf(szoutput_filename, "%s.perm.%s", szprefix, szsuffix);
	fp[0] = fopen(szoutput_filename, "rt");
	if(fp[0]==NULL)
		printf("Error: cannot open file %s for read\n", szoutput_filename);

	sprintf(szoutput_filename, "%s.perm-superset.%s", szprefix, szsuffix);
	fp[1] = fopen(szoutput_filename, "rt");
	if(fp[1]==NULL)
		printf("Error: cannot open file %s for read\n", szoutput_filename);

	sprintf(szoutput_filename, "%s.simu.%s", szprefix, szsuffix);
	fp[2] = fopen(szoutput_filename, "rt");
	if(fp[2]==NULL)
		printf("Error: cannot open file %s for read\n", szoutput_filename);

	sprintf(szoutput_filename, "%s.simu-subset.%s", szprefix, szsuffix);
	fp[3] = fopen(szoutput_filename, "rt");
	if(fp[3]==NULL)
		printf("Error: cannot open file %s for read\n", szoutput_filename);

	sprintf(szoutput_filename, "%s.simu-subset-sib.%s", szprefix, szsuffix);
	fp[4] = fopen(szoutput_filename, "rt");
	if(fp[4]==NULL)
		printf("Error: cannot open file %s for read\n", szoutput_filename);

	sprintf(szoutput_filename, "%s.simu-subset-allsib.%s", szprefix, szsuffix);
	fp[5] = fopen(szoutput_filename, "rt");
	if(fp[5]==NULL)
		printf("Error: cannot open file %s for read\n", szoutput_filename);

	sprintf(szoutput_filename, "%s.simu-superset.%s", szprefix, szsuffix);
	fp[6] = fopen(szoutput_filename, "rt");
	if(fp[6]==NULL)
		printf("Error: cannot open file %s for read\n", szoutput_filename);

	sprintf(szoutput_filename, "%s.simu-hybrid.%s", szprefix, szsuffix);
	fp[7] = fopen(szoutput_filename, "rt");
	if(fp[7]==NULL)
		printf("Error: cannot open file %s for read\n", szoutput_filename);


	nline_no = 0;
	beof = true;
	for(i=0;i<8;i++)
	{
		if(fp[i]!=NULL)
		{
			ch[i] = fgetc(fp[i]);
			beof = false;
		}
	}
	while(!beof)
	{
		for(i=0;i<8;i++)
		{
			nlen = 0;
			while(fp[i]!=NULL && !feof(fp[i]) && ch[i]!='\n')
			{
				szline[nlen++] = ch[i];
				ch[i] = fgetc(fp[i]);
			}
			szline[nlen] = 0;
			fprintf(fpout, "%s\t", szline);
			if(fp[i]!=NULL && !feof(fp[i]) && ch[i]=='\n')
				ch[i] = fgetc(fp[i]);
		}
		fprintf(fpout, "%d\n", nline_no+1);
		nline_no++;

		beof = false;
		for(i=0;i<8;i++)
		{
			if(fp[i]!=NULL && feof(fp[i]))
			{
				beof = true;
				break;
			}
		}
	}
	fclose(fpout);
	for(i=0;i<8;i++)
	{
		if(fp[i]!=NULL)
			fclose(fp[i]);
	}

}

