#include <stdio.h>
#include <stdlib.h>

#include <iostream>

#include "global.h"

int GetCmnTgtSup(int nlen1, int *ptid_list1, int nlen2, int *ptid_list2, int *ptgt_values, int ntgt_class, int &ncmn_tgt_sup)
{
	int i, j, ncmn_sup;

	ncmn_sup = 0;
	ncmn_tgt_sup = 0;
	i = 0;
	j = 0;
	while(i<nlen1 && j<nlen2)
	{
		if(ptid_list1[i]==ptid_list2[j])
		{
			if(ptgt_values[ptid_list1[i]]==ntgt_class) // the target value is 1
				ncmn_tgt_sup++;

			i++;
			j++;
			ncmn_sup++;
		}
		else if(ptid_list1[i]<ptid_list2[j]) {
			i++;
		}
		else {
			j++;
		}
	}

	return ncmn_sup;
}

double MatchOneRule_prob(ASSOCRULE *pmined_rule, ASSOCRULE *ptrue_rule, int *ptgt_values)
{
	int nintersection_size, ncmn_sup, ncmn_tgt_sup;
	double dprob, dprob1, dprob2;
	nintersection_size = get_intersection_size(pmined_rule->npat_len, pmined_rule->pattern, ptrue_rule->npat_len, ptrue_rule->pattern);

	if(pmined_rule->nclass_no!=ptrue_rule->nclass_no)
		return 0;

	if(nintersection_size==pmined_rule->npat_len) 
	{
		if(nintersection_size==ptrue_rule->npat_len) //the two rules are the same
			dprob = 1;
		else  //pmined_rule is a subset of ptrue_rule
			dprob = CalcTwoTailedFisherPvalue(pmined_rule->nsup, pmined_rule->ntgt_sup, ptrue_rule->nsup, ptrue_rule->ntgt_sup);
	}
	else if(nintersection_size==ptrue_rule->npat_len) //pmined_rule is a superset of ptrue_rule
	{
		dprob = CalcTwoTailedFisherPvalue(gndb_size-pmined_rule->nsup, gntgt_sup-pmined_rule->ntgt_sup, ptrue_rule->nsup-pmined_rule->nsup, ptrue_rule->ntgt_sup-pmined_rule->ntgt_sup);
	}
	else //if(nintersection_size>0)
	{
		//printf("here?");
		ncmn_sup = GetCmnTgtSup(pmined_rule->nsup, pmined_rule->ptid_list, ptrue_rule->nsup, ptrue_rule->ptid_list, ptgt_values, ptrue_rule->nclass_no, ncmn_tgt_sup);
		//printf("here!");
		if(ncmn_sup==0)
			dprob = 0;
		else if(ncmn_sup==ptrue_rule->nsup) 
		{
			if(ncmn_sup==pmined_rule->nsup) //T(ptrue_rule) is the same as T(pmined_rule)
				dprob = 1;
			else //T(ptrue_rule) is a subset of T(pmined_rule)
				dprob = CalcTwoTailedFisherPvalue(pmined_rule->nsup, pmined_rule->ntgt_sup, ptrue_rule->nsup, ptrue_rule->ntgt_sup);
		}
		else if(ncmn_sup==pmined_rule->nsup) //T(pmined_rule) is a subset of T(ptrue_rule)
		{
			dprob = CalcTwoTailedFisherPvalue(gndb_size-pmined_rule->nsup, gntgt_sup-pmined_rule->ntgt_sup, ptrue_rule->nsup-pmined_rule->nsup, ptrue_rule->ntgt_sup-pmined_rule->ntgt_sup);
		}
		else
		{
			dprob1 = CalcTwoTailedFisherPvalue(pmined_rule->nsup, pmined_rule->ntgt_sup, ncmn_sup, ncmn_tgt_sup);
			dprob2 = CalcTwoTailedFisherPvalue(gndb_size-pmined_rule->nsup, gntgt_sup-pmined_rule->ntgt_sup, ptrue_rule->nsup-ncmn_sup, ptrue_rule->ntgt_sup-ncmn_tgt_sup);
			dprob = dprob1*dprob2;
		}
	}
	return dprob;
}

double MatchOneRule_set(ASSOCRULE *pmined_rule, ASSOCRULE *ptrue_rule, int *ptgt_values)
{
	int nintersection_size, ncmn_sup, ncmn_tgt_sup;
	double dmatch_score;

	if(pmined_rule->nclass_no!=ptrue_rule->nclass_no)
		return 0;

	nintersection_size = get_intersection_size(pmined_rule->npat_len, pmined_rule->pattern, ptrue_rule->npat_len, ptrue_rule->pattern);

	if(nintersection_size==pmined_rule->npat_len) 
	{
		if(nintersection_size==ptrue_rule->npat_len) //the two rules are the same
			dmatch_score = 1;
		else  //pmined_rule is a subset of ptrue_rule
			dmatch_score = (double)ptrue_rule->nsup/pmined_rule->nsup;
	}
	else if(nintersection_size==ptrue_rule->npat_len) //pmined_rule is a superset of ptrue_rule
	{
		dmatch_score = (double)pmined_rule->nsup/ptrue_rule->nsup;
	}
	else
	{
		ncmn_sup = GetCmnTgtSup(pmined_rule->nsup, pmined_rule->ptid_list, ptrue_rule->nsup, ptrue_rule->ptid_list, ptgt_values, ptrue_rule->nclass_no, ncmn_tgt_sup);
		if(ncmn_sup==0)
			dmatch_score = 0;
		else if(ncmn_sup==ptrue_rule->nsup) 
		{
			if(ncmn_sup==pmined_rule->nsup) //T(ptrue_rule) is the same as T(pmined_rule)
				dmatch_score = 1;
			else //T(ptrue_rule) is a subset of T(pmined_rule)
				dmatch_score = (double)ptrue_rule->nsup/pmined_rule->nsup;
		}
		else if(ncmn_sup==pmined_rule->nsup) //T(pmined_rule) is a subset of T(ptrue_rule)
		{
			dmatch_score = (double)pmined_rule->nsup/ptrue_rule->nsup;
		}
		else
			dmatch_score = 0;
	}

	if(dmatch_score>1)
		printf("Error: the matching score cannot be larger than 1\n");

	return dmatch_score;
}


void GetMatchMetrics(double *pmined_rule_scores, int num_of_mined_rules, double *ptrue_rule_score, int num_of_true_rules, MATCH_METRICS *pmatch_metrics)
{
	int i, nFP;

	nFP = 0;
	for(i=0;i<num_of_mined_rules;i++)
	{
		if(pmined_rule_scores[i]==0)
			nFP++;
	}
	pmatch_metrics->num_of_FPs = nFP;
	if(num_of_mined_rules>0)
		pmatch_metrics->dFDR = (double)nFP/num_of_mined_rules;
	else
		pmatch_metrics->dFDR = 0;

	pmatch_metrics->dprecision = 0;
	if(num_of_mined_rules>0)
	{
		for(i=0;i<num_of_mined_rules;i++)
			pmatch_metrics->dprecision += pmined_rule_scores[i];
		pmatch_metrics->dprecision /= num_of_mined_rules;
	}

	pmatch_metrics->drecall = 0;
	if(num_of_true_rules>0)
	{
		for(i=0;i<num_of_true_rules;i++)
			pmatch_metrics->drecall += ptrue_rule_score[i];
		pmatch_metrics->drecall /= num_of_true_rules;
	}

	if(pmatch_metrics->drecall==0.0 && pmatch_metrics->dprecision==0.0) 
		pmatch_metrics->dF1 = 0;
	else 
		pmatch_metrics->dF1 = 2*pmatch_metrics->drecall*pmatch_metrics->dprecision/(pmatch_metrics->dprecision+pmatch_metrics->drecall);
}

void OutputMatchStat(char* szdataset_name, char *szoutput_name, double dmatch_thres, SIGN_RULE_NUM *psign_rule_nums, MATCH_STAT *pmatch_stat, char *szoutput_filename)
{
	FILE *fp;

	fp = fopen(szoutput_filename, "a+");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for appending\n", szoutput_filename);
		return;
	}

	fprintf(fp, "%s %s\t", szdataset_name, szoutput_name);
	fprintf(fp, "%d %d %d %d %.3f\t", gndataset_instant_num, gndataset_attr_num, gndataset_rule_num, gndataset_rule_sup, gddataset_rule_conf);
	fprintf(fp, "%d %.3f %d %d\t", gnmine_min_sup, gdmine_min_conf, gnum_of_tests, gnum_of_holdout_exp_tests);
	fprintf(fp, "%.3f ", dmatch_thres);
	if(gnum_of_tests>0)
		fprintf(fp, "%.3E %.3E %.3E %.3E %.3E\t", gdmax_pvalue, gdmax_pvalue/gnum_of_tests, psign_rule_nums->dBH_pvalue_thres, gdpermFWER_pvalue_thres, psign_rule_nums->dpermBH_pvalue_thres);
	else
		fprintf(fp, "%.3E %.3E %.3E %.3E %.3E\t", gdmax_pvalue, 0, psign_rule_nums->dBH_pvalue_thres, gdpermFWER_pvalue_thres, psign_rule_nums->dpermBH_pvalue_thres);
	fprintf(fp, "%d %d %d  ", psign_rule_nums->num_of_sign_rules, psign_rule_nums->num_of_BCsign_rules, psign_rule_nums->num_of_BHsign_rules);
	fprintf(fp, "%d %d %d %d\t\t", psign_rule_nums->num_of_perm_sign_rules, psign_rule_nums->num_of_perm_BCsign_rules, psign_rule_nums->num_of_perm_BHsign_rules, psign_rule_nums->num_of_permFWER_rules);

	//fprintf(fp, "%.3f %.3f %.3f\t", pmatch_stat->sign_match_metrics.drecall, pmatch_stat->sign_match_metrics.dprecision, pmatch_stat->sign_match_metrics.dF1);
	//fprintf(fp, "%.3f %.3f %.3f\t", pmatch_stat->BCsign_match_metrics.drecall, pmatch_stat->BCsign_match_metrics.dprecision, pmatch_stat->BCsign_match_metrics.dF1);
	//fprintf(fp, "%.3f %.3f %.3f\t", pmatch_stat->BHsign_match_metrics.drecall, pmatch_stat->BHsign_match_metrics.dprecision, pmatch_stat->BHsign_match_metrics.dF1);
	//fprintf(fp, "%.3f %.3f %.3f\t", pmatch_stat->perm_sign_match_metrics.drecall, pmatch_stat->perm_sign_match_metrics.dprecision, pmatch_stat->perm_sign_match_metrics.dF1);
	//fprintf(fp, "%.3f %.3f %.3f\t", pmatch_stat->perm_BCsign_match_metrics.drecall, pmatch_stat->perm_BCsign_match_metrics.dprecision, pmatch_stat->perm_BCsign_match_metrics.dF1);
	//fprintf(fp, "%.3f %.3f %.3f\t", pmatch_stat->perm_BHsign_match_metrics.drecall, pmatch_stat->perm_BHsign_match_metrics.dprecision, pmatch_stat->perm_BHsign_match_metrics.dF1);
	//fprintf(fp, "%.3f %.3f %.3f\t", pmatch_stat->perm_FWER_match_metrics.drecall, pmatch_stat->perm_FWER_match_metrics.dprecision, pmatch_stat->perm_FWER_match_metrics.dF1);

	fprintf(fp, "%.3f %.3f %.3f\t", pmatch_stat->sign_match_metrics.drecall, pmatch_stat->sign_match_metrics.dprecision, pmatch_stat->sign_match_metrics.dFDR);
	fprintf(fp, "%.3f %.3f %.3f\t", pmatch_stat->BCsign_match_metrics.drecall, pmatch_stat->BCsign_match_metrics.dprecision, pmatch_stat->BCsign_match_metrics.dFDR);
	fprintf(fp, "%.3f %.3f %.3f\t", pmatch_stat->BHsign_match_metrics.drecall, pmatch_stat->BHsign_match_metrics.dprecision, pmatch_stat->BHsign_match_metrics.dFDR);
	fprintf(fp, "%.3f %.3f %.3f\t", pmatch_stat->perm_sign_match_metrics.drecall, pmatch_stat->perm_sign_match_metrics.dprecision, pmatch_stat->perm_sign_match_metrics.dFDR);
	fprintf(fp, "%.3f %.3f %.3f\t", pmatch_stat->perm_BCsign_match_metrics.drecall, pmatch_stat->perm_BCsign_match_metrics.dprecision, pmatch_stat->perm_BCsign_match_metrics.dFDR);
	fprintf(fp, "%.3f %.3f %.3f\t", pmatch_stat->perm_BHsign_match_metrics.drecall, pmatch_stat->perm_BHsign_match_metrics.dprecision, pmatch_stat->perm_BHsign_match_metrics.dFDR);
	fprintf(fp, "%.3f %.3f %.3f\t", pmatch_stat->perm_FWER_match_metrics.drecall, pmatch_stat->perm_FWER_match_metrics.dprecision, pmatch_stat->perm_FWER_match_metrics.dFDR);

	if(pmatch_stat->sign_match_metrics.num_of_FPs>0)
		fprintf(fp, "%d 1\t", pmatch_stat->sign_match_metrics.num_of_FPs);
	else
		fprintf(fp, "%d 0\t", pmatch_stat->sign_match_metrics.num_of_FPs);

	if(pmatch_stat->BCsign_match_metrics.num_of_FPs>0)
		fprintf(fp, "%d 1\t", pmatch_stat->BCsign_match_metrics.num_of_FPs);
	else
		fprintf(fp, "%d 0\t", pmatch_stat->BCsign_match_metrics.num_of_FPs);

	if(pmatch_stat->BHsign_match_metrics.num_of_FPs>0)
		fprintf(fp, "%d 1\t", pmatch_stat->BHsign_match_metrics.num_of_FPs);
	else
		fprintf(fp, "%d 0\t", pmatch_stat->BHsign_match_metrics.num_of_FPs);

	if(pmatch_stat->perm_sign_match_metrics.num_of_FPs>0)
		fprintf(fp, "%d 1\t", pmatch_stat->perm_sign_match_metrics.num_of_FPs);
	else
		fprintf(fp, "%d 0\t", pmatch_stat->perm_sign_match_metrics.num_of_FPs);

	if(pmatch_stat->perm_BCsign_match_metrics.num_of_FPs>0)
		fprintf(fp, "%d 1\t", pmatch_stat->perm_BCsign_match_metrics.num_of_FPs);
	else
		fprintf(fp, "%d 0\t", pmatch_stat->perm_BCsign_match_metrics.num_of_FPs);

	if(pmatch_stat->perm_BHsign_match_metrics.num_of_FPs>0)
		fprintf(fp, "%d 1\t", pmatch_stat->perm_BHsign_match_metrics.num_of_FPs);
	else
		fprintf(fp, "%d 0\t", pmatch_stat->perm_BHsign_match_metrics.num_of_FPs);

	if(pmatch_stat->perm_FWER_match_metrics.num_of_FPs>0)
		fprintf(fp, "%d 1\t", pmatch_stat->perm_FWER_match_metrics.num_of_FPs);
	else
		fprintf(fp, "%d 0\t", pmatch_stat->perm_FWER_match_metrics.num_of_FPs);

	fprintf(fp, "\n");

	fclose(fp);
}


void InitMatchMetrics(MATCH_STAT *pmatch_stat)
{
	pmatch_stat->sign_match_metrics.drecall = 0;
	pmatch_stat->sign_match_metrics.dprecision = 0;
	pmatch_stat->sign_match_metrics.dF1 = 0;
	pmatch_stat->sign_match_metrics.num_of_FPs = 0;
	pmatch_stat->sign_match_metrics.dFDR = 0;

	pmatch_stat->BCsign_match_metrics.drecall = 0;
	pmatch_stat->BCsign_match_metrics.dprecision = 0;
	pmatch_stat->BCsign_match_metrics.dF1 = 0;
	pmatch_stat->BCsign_match_metrics.num_of_FPs = 0;
	pmatch_stat->BCsign_match_metrics.dFDR = 0;

	pmatch_stat->BHsign_match_metrics.drecall = 0;
	pmatch_stat->BHsign_match_metrics.dprecision = 0;
	pmatch_stat->BHsign_match_metrics.dF1 = 0;
	pmatch_stat->BHsign_match_metrics.num_of_FPs = 0;
	pmatch_stat->BHsign_match_metrics.dFDR = 0;

	pmatch_stat->perm_sign_match_metrics.drecall = 0;
	pmatch_stat->perm_sign_match_metrics.dprecision = 0;
	pmatch_stat->perm_sign_match_metrics.dF1 = 0;
	pmatch_stat->perm_sign_match_metrics.num_of_FPs = 0;
	pmatch_stat->perm_sign_match_metrics.dFDR = 0;

	pmatch_stat->perm_BCsign_match_metrics.drecall = 0;
	pmatch_stat->perm_BCsign_match_metrics.dprecision = 0;
	pmatch_stat->perm_BCsign_match_metrics.dF1 = 0;
	pmatch_stat->perm_BCsign_match_metrics.num_of_FPs = 0;
	pmatch_stat->perm_BCsign_match_metrics.dFDR = 0;

	pmatch_stat->perm_BHsign_match_metrics.drecall = 0;
	pmatch_stat->perm_BHsign_match_metrics.dprecision = 0;
	pmatch_stat->perm_BHsign_match_metrics.dF1 = 0;
	pmatch_stat->perm_BHsign_match_metrics.num_of_FPs = 0;
	pmatch_stat->perm_BHsign_match_metrics.dFDR = 0;

	pmatch_stat->perm_FWER_match_metrics.drecall = 0;
	pmatch_stat->perm_FWER_match_metrics.dprecision = 0;
	pmatch_stat->perm_FWER_match_metrics.dF1 = 0;
	pmatch_stat->perm_FWER_match_metrics.num_of_FPs = 0;
	pmatch_stat->perm_FWER_match_metrics.dFDR = 0;
}

void MatchRules(ASSOCRULE *pmined_rules, int num_of_mined_rules, ASSOCRULE *ptrue_rules, int num_of_true_rules, int nmatch_method, double dmatch_thres, SIGN_RULE_NUM *psign_rule_nums, int* ptgt_values, MATCH_STAT *pmatch_stat)
{
	int i, j;
	double *pmined_rule_scores, *ptrue_rule_score, dmatch_score;

	gpdfactorials = new double[gndb_size+1];
	InitFactorials(gpdfactorials, gndb_size);

	InitMatchMetrics(pmatch_stat);

	pmined_rule_scores = new double[num_of_mined_rules];
	for(i=0;i<num_of_mined_rules;i++)
		pmined_rule_scores[i] = 0;
	ptrue_rule_score = new double[num_of_true_rules];
	for(i=0;i<num_of_true_rules;i++)
		ptrue_rule_score[i] = 0;
	
	for(i=0;i<num_of_mined_rules;i++)
	{
		for(j=0;j<num_of_true_rules;j++)
		{
			if(pmined_rules[i].nclass_no==ptrue_rules[j].nclass_no)
			{
				if(nmatch_method==MATCH_PROBABILITY)
					dmatch_score = MatchOneRule_prob(&pmined_rules[i], &ptrue_rules[j], ptgt_values);
				else 
				{
					dmatch_score = MatchOneRule_set(&pmined_rules[i], &ptrue_rules[j], ptgt_values);
					if(nmatch_method==MATCH_BINARY)
					{
						if(dmatch_score>0)
							dmatch_score = 1;
					}
					else if(nmatch_method==MATCH_EXACT_BINARY)
					{
						if(dmatch_score<1)
							dmatch_score = 0;
					}
				}
				if(dmatch_score>=dmatch_thres)
				{
					if(pmined_rule_scores[i]<dmatch_score)
						pmined_rule_scores[i] = dmatch_score;
					if(ptrue_rule_score[j]<dmatch_score)
						ptrue_rule_score[j] = dmatch_score;
				}
			}
		}
		if(i+1==psign_rule_nums->num_of_sign_rules)
			GetMatchMetrics(pmined_rule_scores, i+1, ptrue_rule_score, num_of_true_rules, &pmatch_stat->sign_match_metrics);
		if(i+1==psign_rule_nums->num_of_BCsign_rules)
			GetMatchMetrics(pmined_rule_scores, i+1, ptrue_rule_score, num_of_true_rules, &pmatch_stat->BCsign_match_metrics);
		if(i+1==psign_rule_nums->num_of_BHsign_rules)
			GetMatchMetrics(pmined_rule_scores, i+1, ptrue_rule_score, num_of_true_rules, &pmatch_stat->BHsign_match_metrics);

		if(i+1==psign_rule_nums->num_of_perm_sign_rules)
			GetMatchMetrics(pmined_rule_scores, i+1, ptrue_rule_score, num_of_true_rules, &pmatch_stat->perm_sign_match_metrics);
		if(i+1==psign_rule_nums->num_of_perm_BCsign_rules)
			GetMatchMetrics(pmined_rule_scores, i+1, ptrue_rule_score, num_of_true_rules, &pmatch_stat->perm_BCsign_match_metrics);
		if(i+1==psign_rule_nums->num_of_perm_BHsign_rules)
			GetMatchMetrics(pmined_rule_scores, i+1, ptrue_rule_score, num_of_true_rules, &pmatch_stat->perm_BHsign_match_metrics);

		if(i+1==psign_rule_nums->num_of_permFWER_rules)
			GetMatchMetrics(pmined_rule_scores, i+1, ptrue_rule_score, num_of_true_rules, &pmatch_stat->perm_FWER_match_metrics);
	}

	delete []gpdfactorials;
	delete []pmined_rule_scores;
	delete []ptrue_rule_score;
}

//calculate the p-value of pmined_rule if ptrue_rule does not exist
double MatchOneRule_pvalue(ASSOCRULE *pmined_rule, ASSOCRULE *ptrue_rule, int *ptgt_values, double dtgt_global_conf, bool &bmatched)
{
	int nintersection_size, ncmn_sup, ncmn_tgt_sup, nnew_tgt_sup;
	double dpvalue;

	bmatched = false;

	if(ptrue_rule->nclass_no!=pmined_rule->nclass_no)
		return 0;

	nintersection_size = get_intersection_size(pmined_rule->npat_len, pmined_rule->pattern, ptrue_rule->npat_len, ptrue_rule->pattern);

	if(nintersection_size==pmined_rule->npat_len) 
	{
		if(nintersection_size==ptrue_rule->npat_len || pmined_rule->nsup==ptrue_rule->nsup) //the two rules are the same
		{
			dpvalue = 1;
			bmatched = true;
		}
		else  //pmined_rule is a subset of ptrue_rule
		{
			nnew_tgt_sup = (pmined_rule->ntgt_sup-ptrue_rule->ntgt_sup)+(int)(ptrue_rule->nsup*dtgt_global_conf);
			dpvalue = CalcTwoTailedFisherPvalue(gndb_size, (int)(gndb_size*dtgt_global_conf), pmined_rule->nsup, nnew_tgt_sup);
		}
	}
	else if(nintersection_size==ptrue_rule->npat_len) //pmined_rule is a superset of ptrue_rule
	{
		dpvalue = 1;
		if(pmined_rule->nsup==ptrue_rule->nsup)
			bmatched = true;
	}
	else //if(nintersection_size>0)
	{
		ncmn_sup = GetCmnTgtSup(pmined_rule->nsup, pmined_rule->ptid_list, ptrue_rule->nsup, ptrue_rule->ptid_list, ptgt_values, ptrue_rule->nclass_no, ncmn_tgt_sup);

		if(ncmn_sup==0)
			dpvalue = 0;
		else if(ncmn_sup==ptrue_rule->nsup) 
		{
			if(ncmn_sup==pmined_rule->nsup) //T(ptrue_rule) is the same as T(pmined_rule)
			{
				bmatched = true;
				dpvalue = 1;
			}
			else //T(ptrue_rule) is a subset of T(pmined_rule)
			{
				nnew_tgt_sup = (pmined_rule->ntgt_sup-ncmn_tgt_sup)+(int)(ncmn_sup*dtgt_global_conf);
				dpvalue = CalcTwoTailedFisherPvalue(gndb_size, (int)(gndb_size*dtgt_global_conf), pmined_rule->nsup, nnew_tgt_sup);
			}
		}
		else if(ncmn_sup==pmined_rule->nsup) //T(pmined_rule) is a subset of T(ptrue_rule)
		{
			dpvalue = 1;
		}
		else
		{
			nnew_tgt_sup = (pmined_rule->ntgt_sup-ncmn_tgt_sup)+(int)(ncmn_sup*dtgt_global_conf);
			dpvalue = CalcTwoTailedFisherPvalue(gndb_size, (int)(gndb_size*dtgt_global_conf), pmined_rule->nsup, nnew_tgt_sup);
		}
	}
	return dpvalue;
}


void GetMatchMetrics(double *pmined_rule_scores, int num_of_mined_rules, int nmatch_method, double dpvalue_thres, bool btrue_rule_matched, MATCH_METRICS *pmatch_metrics)
{
	int nFP, i;

	nFP = 0;
	for(i=0;i<num_of_mined_rules;i++)
	{
		if(pmined_rule_scores[i]<=dpvalue_thres)
			nFP++;
	}
	pmatch_metrics->num_of_FPs = nFP;
	if(num_of_mined_rules>0)
		pmatch_metrics->dFDR = (double)nFP/num_of_mined_rules;
	else
		pmatch_metrics->dFDR = 0;

	if(btrue_rule_matched)
	{
		pmatch_metrics->drecall = 1;
		if(nmatch_method==MATCH_PVALUE_FP)
			pmatch_metrics->dprecision = (double)1/(nFP+1);
		else if(num_of_mined_rules>0)
			pmatch_metrics->dprecision = (double)(num_of_mined_rules-nFP)/num_of_mined_rules;
		else
			pmatch_metrics->dprecision = 0;
	}
	else
	{
		pmatch_metrics->drecall = 0;
		if(nmatch_method==MATCH_PVALUE_FP)
			pmatch_metrics->dprecision = 0;
		else if(num_of_mined_rules>0)
			pmatch_metrics->dprecision = (double)(num_of_mined_rules-nFP)/num_of_mined_rules;
		else
			pmatch_metrics->dprecision = 0;
	}
	
	if(pmatch_metrics->drecall==0.0 || pmatch_metrics->dprecision==0.0) 
		pmatch_metrics->dF1 = 0;
	else 
		pmatch_metrics->dF1 = 2*pmatch_metrics->drecall*pmatch_metrics->dprecision/(pmatch_metrics->dprecision+pmatch_metrics->drecall);
}

void MatchRules_pvalue(ASSOCRULE *pmined_rules, int num_of_mined_rules, ASSOCRULE *ptrue_rules, int nmatch_method, SIGN_RULE_NUM *psign_rule_nums, int* ptgt_values, MATCH_STAT *pmatch_stat)
{
	int i;
	double *pmined_rule_scores, dpvalue;
	bool bmatched, btrue_rule_matched;

	gpdfactorials = new double[gndb_size+1];
	InitFactorials(gpdfactorials, gndb_size);

	InitMatchMetrics(pmatch_stat);

	pmined_rule_scores = new double[num_of_mined_rules];
	for(i=0;i<num_of_mined_rules;i++)
		pmined_rule_scores[i] = 0;
	
	btrue_rule_matched = false;
	for(i=0;i<num_of_mined_rules;i++)
	{
		dpvalue = MatchOneRule_pvalue(&pmined_rules[i], &ptrue_rules[0], ptgt_values, (double)1/gnum_of_classes, bmatched);
		pmined_rule_scores[i] = dpvalue;
		if(bmatched)
			btrue_rule_matched = true;

		if(i+1==psign_rule_nums->num_of_sign_rules)
			GetMatchMetrics(pmined_rule_scores, i+1, nmatch_method, psign_rule_nums->dpvalue_thres, btrue_rule_matched, &pmatch_stat->sign_match_metrics);
		if(i+1==psign_rule_nums->num_of_BCsign_rules)
			GetMatchMetrics(pmined_rule_scores, i+1, nmatch_method, psign_rule_nums->dBC_pvalue_thres, btrue_rule_matched, &pmatch_stat->BCsign_match_metrics);
		if(i+1==psign_rule_nums->num_of_BHsign_rules)
			GetMatchMetrics(pmined_rule_scores, i+1, nmatch_method, psign_rule_nums->dBH_pvalue_thres, btrue_rule_matched, &pmatch_stat->BHsign_match_metrics);

		if(i+1==psign_rule_nums->num_of_perm_BHsign_rules)
			GetMatchMetrics(pmined_rule_scores, i+1, nmatch_method, psign_rule_nums->dpermBH_pvalue_thres, btrue_rule_matched, &pmatch_stat->perm_BHsign_match_metrics);

		if(i+1==psign_rule_nums->num_of_permFWER_rules)
			GetMatchMetrics(pmined_rule_scores, i+1, nmatch_method, psign_rule_nums->dperm_FWER_thres, btrue_rule_matched, &pmatch_stat->perm_FWER_match_metrics);
	}

	delete []gpdfactorials;
	delete []pmined_rule_scores;

}


//for datasets with embedded rules
void MatchRules(char* szrule_output_name, char* szoutput_name, char* szdataset_name, int nmatch_method, double dmatch_thres, char* szoutput_filename)
{
	ASSOCRULE *pmined_rules, *ptrue_rules;
	int *pmined_itemset_buf, *pmined_tidlist_buf, *ptrue_itemset_buf, *ptrue_tidlist_buf;
	int num_of_true_rules, num_of_mined_rules, nmax_true_rule_len, nmax_mined_rule_len;
	int *ptgt_values, i, ntgt_class;
	SIGN_RULE_NUM thesign_rule_nums;
	MATCH_STAT the_match_stat;
	map<string, int> tgtvalue_map;
	map<string, int>::iterator map_it;

	//load embedded rules
	num_of_true_rules = LoadTrueRules(szdataset_name, ptrue_rules, ptrue_itemset_buf, ptrue_tidlist_buf, nmax_true_rule_len);
	if(gnum_of_classes>2)
	{
		LoadTgtValueMap(szoutput_name, &tgtvalue_map);
		map_it = tgtvalue_map.find(gszembed_target_value);
		if(map_it==tgtvalue_map.end())
		{
			ntgt_class = -1;
			printf("Error: the class of the embedded rule is not find in target value map\n");
		}
		else
			ntgt_class = map_it->second;
	}
	else
		ntgt_class = 1;
	for(i=0;i<num_of_true_rules;i++)
		ptrue_rules[i].nclass_no = ntgt_class;

	//---------------load generated rules ----------------------------------------
	//get dataset size and #positives in the dataset
	ReadTreeStatis(szoutput_name);

	//load the target values 
	ptgt_values = new int[gndb_size];
	LoadTgtValues(szoutput_name, ptgt_values);

	//load generated rules
	num_of_mined_rules = LoadMinedRules(szrule_output_name, pmined_rules, pmined_itemset_buf, pmined_tidlist_buf, nmax_mined_rule_len);
	//load the tidlist of generated rules
	LoadMinedTidList(szoutput_name, pmined_rules, num_of_mined_rules);
	//-----------------------------------------------------------------------------

	//load p-value thresholds and the number of signficant rules generated by different methods
	LoadSignRuleNums(szrule_output_name, &thesign_rule_nums);


	//match mined rules with embedded rules
	if(nmatch_method==MATCH_PVALUE || nmatch_method==MATCH_PVALUE_FP)
		MatchRules_pvalue(pmined_rules, num_of_mined_rules, ptrue_rules, nmatch_method, &thesign_rule_nums, ptgt_values, &the_match_stat);
	else
		MatchRules(pmined_rules, num_of_mined_rules, ptrue_rules, num_of_true_rules, nmatch_method, dmatch_thres, &thesign_rule_nums, ptgt_values, &the_match_stat);


	delete []ptgt_values;

	delete []ptrue_rules;
	delete []ptrue_itemset_buf;
	delete []ptrue_tidlist_buf;
	delete []pmined_rules;
	delete []pmined_itemset_buf;
	delete []pmined_tidlist_buf;

	OutputMatchStat(szdataset_name, szrule_output_name, dmatch_thres, &thesign_rule_nums, &the_match_stat, szoutput_filename);
}



void MatchRules(char* szdataset_name, int num_of_rules, char* szoutput_name, double dmatch_thres, char* szsum_prefix)
{
	char szrule_output_name[200], szrep_rule_output_name[200], szsupadj_rule_name[200], szsupadj_rep_rule_name[200];
	char szsum_filename[200];

	sprintf(szrule_output_name, "%s.sorted", szoutput_name);
	sprintf(szrep_rule_output_name, "%s.rep", szoutput_name);
	sprintf(szsupadj_rule_name, "%s.supadj", szoutput_name);
	sprintf(szsupadj_rep_rule_name, "%s.supadj.rep", szoutput_name);

	if(num_of_rules>0)
	{
		sprintf(szsum_filename, "%s.all.pvalue.sum.txt", szsum_prefix);
		MatchRules(szrule_output_name, szoutput_name, szdataset_name, MATCH_PVALUE, dmatch_thres, szsum_filename);
		sprintf(szsum_filename, "%s.all.pvalue-FP.sum.txt", szsum_prefix);
		MatchRules(szrule_output_name, szoutput_name, szdataset_name, MATCH_PVALUE_FP, dmatch_thres, szsum_filename);
		sprintf(szsum_filename, "%s.all.sets.sum.txt", szsum_prefix);
		MatchRules(szrule_output_name, szoutput_name, szdataset_name, MATCH_SET, dmatch_thres, szsum_filename);

		//sprintf(szsum_filename, "%s.all.prob.sum.txt", szsum_prefix);
		//MatchRules(szrule_output_name, szoutput_name, szdataset_name, MATCH_PROBABILITY, dmatch_thres, szsum_filename);
		//sprintf(szsum_filename, "%s.all.binary.sum.txt", szsum_prefix);
		//MatchRules(szrule_output_name, szoutput_name, szdataset_name, MATCH_BINARY, dmatch_thres, szsum_filename);
		//sprintf(szsum_filename, "%s.all.exact.sum.txt", szsum_prefix);
		//MatchRules(szrule_output_name, szoutput_name, szdataset_name, MATCH_EXACT_BINARY, dmatch_thres, szsum_filename);

		//sprintf(szsum_filename, "%s.rep.prob.sum.txt", szsum_prefix);
		//MatchRules(szrep_rule_output_name, szoutput_name, szdataset_name, MATCH_PROBABILITY, dmatch_thres, szsum_filename);
		//sprintf(szsum_filename, "%s.rep.sets.sum.txt", szsum_prefix);
		//MatchRules(szrep_rule_output_name, szoutput_name, szdataset_name, MATCH_SET, dmatch_thres, szsum_filename);
		//sprintf(szsum_filename, "%s.rep.binary.sum.txt", szsum_prefix);
		//MatchRules(szrep_rule_output_name, szoutput_name, szdataset_name, MATCH_BINARY, dmatch_thres, szsum_filename);
		//sprintf(szsum_filename, "%s.rep.exact.sum.txt", szsum_prefix);
		//MatchRules(szrep_rule_output_name, szoutput_name, szdataset_name, MATCH_EXACT_BINARY, dmatch_thres, szsum_filename);

		//sprintf(szsum_filename, "%s.supadj.prob.sum.txt", szsum_prefix);
		//MatchRules(szsupadj_rule_name, szoutput_name, szdataset_name, MATCH_PROBABILITY, dmatch_thres, szsum_filename);
		//sprintf(szsum_filename, "%s.supadj.sets.sum.txt", szsum_prefix);
		//MatchRules(szsupadj_rule_name, szoutput_name, szdataset_name, MATCH_SET, dmatch_thres, szsum_filename);
		//sprintf(szsum_filename, "%s.supadj.binary.sum.txt", szsum_prefix);
		//MatchRules(szsupadj_rule_name, szoutput_name, szdataset_name, MATCH_BINARY, dmatch_thres, szsum_filename);
		//sprintf(szsum_filename, "%s.supadj.exact.sum.txt", szsum_prefix);
		//MatchRules(szsupadj_rule_name, szoutput_name, szdataset_name, MATCH_EXACT_BINARY, dmatch_thres, szsum_filename);

		//sprintf(szsum_filename, "%s.supadj-rep.prob.sum.txt", szsum_prefix);
		//MatchRules(szsupadj_rep_rule_name, szoutput_name, szdataset_name, MATCH_PROBABILITY, dmatch_thres, szsum_filename);
		//sprintf(szsum_filename, "%s.supadj-rep.sets.sum.txt", szsum_prefix);
		//MatchRules(szsupadj_rep_rule_name, szoutput_name, szdataset_name, MATCH_SET, dmatch_thres, szsum_filename);
		//sprintf(szsum_filename, "%s.supadj-rep.binary.sum.txt", szsum_prefix);
		//MatchRules(szsupadj_rep_rule_name, szoutput_name, szdataset_name, MATCH_BINARY, dmatch_thres, szsum_filename);
		//sprintf(szsum_filename, "%s.supadj-rep.exact.sum.txt", szsum_prefix);
		//MatchRules(szsupadj_rep_rule_name, szoutput_name, szdataset_name, MATCH_EXACT_BINARY, dmatch_thres, szsum_filename);
	}
	else
	{
		sprintf(szsum_filename, "%s.rand.all.sum.txt", szsum_prefix);
		MatchRules(szrule_output_name, szdataset_name, szsum_filename);
		//sprintf(szsum_filename, "%s.rand.rep.sum.txt", szsum_prefix);
		//MatchRules(szrep_rule_output_name, szdataset_name, szsum_filename);
		//sprintf(szsum_filename, "%s.rand.supadj.sum.txt", szsum_prefix);
		//MatchRules(szsupadj_rule_name, szdataset_name, szsum_filename);
		//sprintf(szsum_filename, "%s.rand.supadj-rep.sum.txt", szsum_prefix);
		//MatchRules(szsupadj_rep_rule_name, szdataset_name, szsum_filename);
	}
}



// ======= for random datasets ===========
void OutputMatchStat(char* szdataset_name, char *szoutput_name, SIGN_RULE_NUM *psign_rule_nums, char *szoutput_filename)
{
	FILE *fp;

	fp = fopen(szoutput_filename, "a+");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for appending\n", szoutput_filename);
		return;
	}

	fprintf(fp, "%s %s\t", szdataset_name, szoutput_name);
	fprintf(fp, "%d %d %d %d %.3f\t", gndataset_instant_num, gndataset_attr_num, gndataset_rule_num, gndataset_rule_sup, gddataset_rule_conf);
	fprintf(fp, "%d %.3f %d %d\t", gnmine_min_sup, gdmine_min_conf, gnum_of_tests, gnum_of_holdout_exp_tests);
	if(gnum_of_tests>0)
		fprintf(fp, "%.3E %.3E %.3E %.3E %.3E\t", gdmax_pvalue, gdmax_pvalue/gnum_of_tests, psign_rule_nums->dBH_pvalue_thres, gdpermFWER_pvalue_thres, psign_rule_nums->dpermBH_pvalue_thres);
	else
		fprintf(fp, "%.3E %.3E %.3E %.3E %.3E\t", gdmax_pvalue, 0, psign_rule_nums->dBH_pvalue_thres, gdpermFWER_pvalue_thres, psign_rule_nums->dpermBH_pvalue_thres);
	fprintf(fp, "%d %d %d  ", psign_rule_nums->num_of_sign_rules, psign_rule_nums->num_of_BCsign_rules, psign_rule_nums->num_of_BHsign_rules);
	fprintf(fp, "%d %d %d %d\t\t", psign_rule_nums->num_of_perm_sign_rules, psign_rule_nums->num_of_perm_BCsign_rules, psign_rule_nums->num_of_perm_BHsign_rules, psign_rule_nums->num_of_permFWER_rules);

	if(psign_rule_nums->num_of_sign_rules>0)
		fprintf(fp, "1 ");
	else
		fprintf(fp, "0 ");

	if(psign_rule_nums->num_of_BCsign_rules>0)
		fprintf(fp, "1 ");
	else
		fprintf(fp, "0 ");

	if(psign_rule_nums->num_of_BHsign_rules>0)
		fprintf(fp, "1 ");
	else
		fprintf(fp, "0 ");

	if(psign_rule_nums->num_of_perm_sign_rules>0)
		fprintf(fp, "1 ");
	else
		fprintf(fp, "0 ");

	if(psign_rule_nums->num_of_perm_BCsign_rules>0)
		fprintf(fp, "1 ");
	else
		fprintf(fp, "0 ");

	if(psign_rule_nums->num_of_perm_BHsign_rules>0)
		fprintf(fp, "1 ");
	else
		fprintf(fp, "0 ");

	if(psign_rule_nums->num_of_permFWER_rules>0)
		fprintf(fp, "1 ");
	else
		fprintf(fp, "0 ");
	fprintf(fp, "\n");

	fclose(fp);
}


//for random datasets without embedded rules
void MatchRules(char* szrule_output_name, char* szdataset_name, char* szoutput_filename)
{
	SIGN_RULE_NUM thesign_rule_nums;

	//load p-value thresholds and the number of signficant rules generated by different methods
	LoadSignRuleNums(szrule_output_name, &thesign_rule_nums);

	OutputMatchStat(szdataset_name, szrule_output_name, &thesign_rule_nums, szoutput_filename);

}

