#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <sys/timeb.h>

#include "global.h"
#include "cfptree_ehta.h"
#include "pvalue.h"
CTREE_EHTA gocfptree_ehta;

int gnum_of_contexts;
int gnum_of_tests;
int gnum_of_sign_hypotheses;
int gnum_of_total_sign_hypotheses;
int gntotal_context_len;
int gntotal_diff_item_num;
int gnum_of_BCsign_hypotheses;
int gnum_of_BHsign_hypotheses;

double gdBC_pvalue_thres;
double gdpermFWER_pvalue_thres;

SIGN_RULE_NUM gosign_rule_nums;

SIGN_RULE_NUM gorep_item_sign_rule_nums;
SIGN_RULE_NUM gorep_tidlist_sign_rule_nums;
SIGN_RULE_NUM gorep_collective_sign_rule_nums;

SIGN_RULE_NUM gosupadj_sign_rule_nums;
SIGN_RULE_NUM gosupadj_rep_sign_rule_nums;

int gnum_of_output_rules;

bool *gphas_paradox_flags;

int gntotal_diff_item_pairs;

CONTRIBUTION *gpattr_contributions;
int gnum_of_analyze_attrs;
CONTRIBUTION *gpitem_contributions;
int gnum_of_analyze_items;

int *gpsup_testnums;
int *gplen_testnums;

int *gptemp_tgt_sup_array;


void CTREE_EHTA::GenAssocRules(char* szoutput_name)
{
	char szattrvalue_filename[200], szcfp_filename[200], szcfp_stat_filename[200], sztidlist_filename[200], sztidlist_dir_filename[200];
	char szrule_filename[200], szpvalue_filename[200];
	int i;
	struct timeb start, end;
	
	ftime(&start);

	OpenLogFile();

	gdused_mem_size = 0;
	gdmax_used_mem_size = 0;
	gnum_of_tests = 0;
	gnum_of_output_rules = 0;
	gntotal_context_len = 0;


	sprintf(szcfp_filename, "%s.cfptree", szoutput_name);
	mfpcfp_file = fopen(szcfp_filename, "rb");
	if(mfpcfp_file==NULL)
	{
		printf("Error: cannot open file %s for read\n", szcfp_filename);
		return;
	}

	sprintf(szrule_filename, "%s.rules.txt", szoutput_name);
	mfptext = fopen(szrule_filename, "wt");
	if(mfptext==NULL)
	{
		printf("Error: cannot open file %s for write\n", szrule_filename);
		return;
	}
	sprintf(szpvalue_filename, "%s.pvalues", szoutput_name);
	mfppvalue = fopen(szpvalue_filename, "wt");
	if(mfppvalue==NULL)
	{
		printf("Error: cannot open file %s for write\n", szpvalue_filename);
		return;
	}


	sprintf(szattrvalue_filename, "%s.attrvalue2item.txt", szoutput_name);
	LoadAttrNValues(szattrvalue_filename);

	sprintf(szcfp_stat_filename, "%s.stat", szcfp_filename);
	ReadTreeStatis(szcfp_stat_filename);

	if(gntgt_attr_type==NOMINAL && gsztarget_value[0]==0 && gnum_of_tgt_values>2 && gncompare_mode==PAIR_WISE)
		gbrule_multiclass_pairwise = true;
	else
		gbrule_multiclass_pairwise = false;


	//load cfp-tree or part of cfp-tree into memory
	if(gntree_size<=TREE_TRAVERSE_BUF_SIZE)
		mnbuf_num_of_pages = (gntree_size+gntree_page_size-1)/gntree_page_size;
	else
		mnbuf_num_of_pages = TREE_TRAVERSE_BUF_SIZE/gntree_page_size;
	if(mnbuf_num_of_pages==1)
		mnbuf_num_of_pages++;
	mnbuf_size = mnbuf_num_of_pages*gntree_page_size;
	mptree_buffer = NewCFPBufPointers(mnbuf_num_of_pages);
	mninbuf_start_pos = 0;
	mninbuf_end_pos = 0;
	for(i=0;i<mnbuf_num_of_pages;i++)
	{
		mptree_buffer[i] = NewCFPBufPage(gntree_page_size);
		fread(mptree_buffer[i], sizeof(char), gntree_page_size, mfpcfp_file);
		mninbuf_end_pos += gntree_page_size;
	}

	mpactive_stack = NewActiveStack(gnmax_tree_depth);
	mnactive_top = 0;
	mnmin_inmem_level = -1;


	mppattern = NewIntArray(gnum_of_attrs);
	mnpat_len = 0;
	gndepth = 0;
	mopat_info.nsupport = gndb_size;
	mopat_info.ptgt_stat = NewCharArray(gntgt_stat_size);
	memcpy(mopat_info.ptgt_stat, gptgt_stat_array, gntgt_stat_size);

	morule.dadjusted_pvalue = 0;
	morule.dcond_pvalue = 0;
	morule.pattern = mppattern;
	morule.ppat_info = &mopat_info;

	//open tid list file and load the disk position of tid lists
	if(gntgt_attr_type==CONTINUOUS && gnmin_sup<CLT_THRESHOLD)
	{
		gptgt_values = NewDoubleArray(gndb_size);
		LoadTransTgtValues(szoutput_name, gptgt_values);

		gpgroup_tgt_values = new GROUP_TGT_VALUE[gndb_size];
		IncMemSize(sizeof(GROUP_TGT_VALUE)*gndb_size);

		sprintf(sztidlist_filename, "%s.tidlist", szoutput_name);
		mfp_tidlist = fopen(sztidlist_filename, "rb");
		if(mfp_tidlist==NULL)
		{
			printf("Error: cannot open file %s for read\n", sztidlist_filename);
			return;
		}

		sprintf(sztidlist_dir_filename, "%s.tidlist.dir", szoutput_name);
		int ntotal_entries;
		ntotal_entries = LoadTidListPos(sztidlist_dir_filename, mptidlist_dir_nodes, mnmax_tidlist_len);
		if(ntotal_entries!=gnum_of_entries)
			printf("Error: inconsistent number of entries\n");

		mptid_list = NewIntArray(mnmax_tidlist_len);
	}

	gpsup_testnums = new int[gndb_size];
	memset(gpsup_testnums, 0, sizeof(int)*gndb_size);

	if(gntgt_attr_type==NOMINAL && (gntest_statisitic_method==FISHER_EXACT_TEST || gbrule_multiclass_pairwise))
		InitFactorials(gpdfactorials, gndb_size);

	gen_rules(0, -1, gndb_size, true, gdmax_pvalue);

	if(gntgt_attr_type==NOMINAL && (gntest_statisitic_method==FISHER_EXACT_TEST || gbrule_multiclass_pairwise))
		DelFactorials(gpdfactorials, gndb_size);

	if(gntgt_attr_type==CONTINUOUS && gnmin_sup<CLT_THRESHOLD)
	{
		DelDoubleArray(gptgt_values, gndb_size);
		delete []gpgroup_tgt_values;
		DecMemSize(sizeof(GROUP_TGT_VALUE)*gndb_size);

		fclose(mfp_tidlist);
		delete []mptidlist_dir_nodes;
		DecMemSize(sizeof(PAT_DIR_NODE)*gnum_of_entries);
		DelIntArray(mptid_list, mnmax_tidlist_len);
	}

	DelCharArray(mopat_info.ptgt_stat, gntgt_stat_size);
	DelIntArray(mppattern, gnum_of_attrs);

	DelActiveStack(mpactive_stack, gnmax_tree_depth);

	for(i=0;i<mnbuf_num_of_pages;i++)
	{
		if(mptree_buffer[i]!=NULL)
			DelCFPBufPage(mptree_buffer[i], gntree_page_size);
	}
	DelCFPBufPointers(mptree_buffer, mnbuf_num_of_pages);

	DelCharArray(gptgt_stat_array, gntgt_stat_size);

	delete []gpAttributes;
	DecMemSize(sizeof(ATTRIBUTE)*gnum_of_attrs);
	DelCharArray(gszattr_name_buf, gnattr_name_buf_size);
	delete []gpAttrValues;
	DecMemSize(sizeof(ATTR_VALUE)*gnum_of_items);
	DelCharArray(gszattr_value_buf, gnattr_value_buf_size);

	fclose(mfpcfp_file);
	fclose(mfptext);
	fclose(mfppvalue);

	OutputRuleSummary(szoutput_name);

	ftime(&end);
	gdgen_rule_time = end.time-start.time+(double)(end.millitm-start.millitm)/1000;
	gdgenrule_max_mem_size = gdmax_used_mem_size;

	printf("Time for rule generation: %.3f\n", gdgen_rule_time);
	printf("#tests: %d\n", gnum_of_tests);
	printf("#Rules at %.2E level: %d\n", gdmax_pvalue, gnum_of_output_rules);
	//printf("#local Rules at %.2E level: %d\n", gdmax_pvalue, gnum_of_locsign_rules);
	printf("\n");

	CloseLogFile();

}

void CTREE_EHTA::gen_rules(int ndisk_pos, int nparent_entry_no, int nparent_sup, bool bparent_is_singleton, double dmax_pvalue)
{
	int nload_flag, ncur_page_no, ncur_page_pos, num_of_entries;
	int i, *pitems, j, norig_pat_len;
	ENTRY *pentries;
	char* ptgt_stat_array;

	if(ndisk_pos>=mninbuf_end_pos)
	{
		nload_flag= LoadPages(ndisk_pos, ndisk_pos);
		if(nload_flag==-1)
			printf("Error: the data cannot be loaded into buffer\n");
	}

	ncur_page_no = ndisk_pos/gntree_page_size%mnbuf_num_of_pages;
	ncur_page_pos = ndisk_pos%gntree_page_size;
	memcpy(&num_of_entries, &mptree_buffer[ncur_page_no][ncur_page_pos], sizeof(int));

	if(num_of_entries>1)
	{
		if(gndepth==1 && !bparent_is_singleton)
			test_onerule(nparent_entry_no, true, dmax_pvalue);
		else 
			test_onerule(nparent_entry_no, false, dmax_pvalue);
		if(gndepth<gnmax_len)
		{
			mpactive_stack[mnactive_top].ndisk_pos = ndisk_pos;
			mpactive_stack[mnactive_top].mem_page_no = ncur_page_no;
			mpactive_stack[mnactive_top].mem_page_pos = ncur_page_pos;
			mpactive_stack[mnactive_top].in_mem = true;
			mpactive_stack[mnactive_top].nprefix_len = mnpat_len;
			mpactive_stack[mnactive_top].ncur_order = 0;
			mpactive_stack[mnactive_top].num_of_entries = num_of_entries;
			mpactive_stack[mnactive_top].pentries = (ENTRY*)&mptree_buffer[ncur_page_no][ncur_page_pos+sizeof(int)];
			mpactive_stack[mnactive_top].ptgt_stat_array = &mptree_buffer[ncur_page_no][ncur_page_pos+sizeof(int)+sizeof(ENTRY)*num_of_entries];
			if(mnmin_inmem_level==-1)
				mnmin_inmem_level = mnactive_top;
			mnactive_top++;

			for(i=0;i<num_of_entries;i++)
			{
				pentries = mpactive_stack[mnactive_top-1].pentries;
				if(gpAttrValues[pentries[i].item].bis_context_item)
				{
					mppattern[mnpat_len++] = pentries[i].item;
					gndepth++;
					mopat_info.nsupport = pentries[i].support;
					memcpy(mopat_info.ptgt_stat, &mpactive_stack[mnactive_top-1].ptgt_stat_array[i*gntgt_stat_size], gntgt_stat_size);
					if(pentries[i].support>=gnmin_sup && ((pentries[i].hash_bitmap & (1<<CFP_HASH_LEN))==0 ||
						gndepth==1 && gnoutput_mode==OUTPUT_CLOSED_N_SINGLETON || OUTPUT_ALL))
					{
						for(j=i+1;j<num_of_entries;j++)
						{
							if(pentries[j].child!=0)
								break;
						}
						if(pentries[i].child!=0)
							gen_rules(pentries[i].child, pentries[i].npreorder, pentries[i].support, false, dmax_pvalue);
						else if(gndepth==1)
							test_onerule(pentries[i].npreorder, true, dmax_pvalue);
						else
							test_onerule(pentries[i].npreorder, false, dmax_pvalue);
					}
					gndepth--;
					mnpat_len--;
				}
				mpactive_stack[mnactive_top-1].ncur_order++; 
			}
			mnactive_top--;
			if(!mpactive_stack[mnactive_top].in_mem)
			{
				DelEntryArray(mpactive_stack[mnactive_top].pentries, num_of_entries);
				DelTgtStatArray(mpactive_stack[mnactive_top].ptgt_stat_array, gntgt_stat_size*num_of_entries);
			}
		}
	}
	else if(num_of_entries==1 || num_of_entries<0)
	{
		pentries = (ENTRY*)&mptree_buffer[ncur_page_no][ncur_page_pos+sizeof(int)];

		if(nparent_sup>pentries->support)
		{
			if(gndepth==1 && !bparent_is_singleton)
				test_onerule(nparent_entry_no, true, dmax_pvalue);
			else
				test_onerule(nparent_entry_no, false, dmax_pvalue);
		}

		if(pentries->support>=gnmin_sup && (pentries->hash_bitmap & (1<<CFP_HASH_LEN))==0)
		{
			if(num_of_entries==1)
				ptgt_stat_array = &mptree_buffer[ncur_page_no][ncur_page_pos+sizeof(int)+sizeof(ENTRY)];
			else
				ptgt_stat_array = &mptree_buffer[ncur_page_no][ncur_page_pos+sizeof(ENTRY)+sizeof(int)*(-num_of_entries)];

			pitems = (int*)(ENTRY*)&mptree_buffer[ncur_page_no][ncur_page_pos+sizeof(ENTRY)];

			mpactive_stack[mnactive_top].ndisk_pos = ndisk_pos;
			mpactive_stack[mnactive_top].mem_page_no = ncur_page_no;
			mpactive_stack[mnactive_top].mem_page_pos = ncur_page_pos;
			mpactive_stack[mnactive_top].in_mem = true;
			mpactive_stack[mnactive_top].ncur_order = 0;
			mpactive_stack[mnactive_top].nprefix_len = mnpat_len;
			mpactive_stack[mnactive_top].num_of_entries = num_of_entries;
			mpactive_stack[mnactive_top].pentries = pentries;
			mpactive_stack[mnactive_top].ptgt_stat_array = ptgt_stat_array;
			if(mnmin_inmem_level==-1)
				mnmin_inmem_level = mnactive_top;
			mnactive_top++;

			if(num_of_entries<0)
				num_of_entries = -num_of_entries;

			norig_pat_len = mnpat_len;
			for(i=0;i<num_of_entries;i++)
			{
				if(gpAttrValues[pitems[i]].bis_context_item)
					mppattern[mnpat_len++] = pitems[i];
			}
			mopat_info.nsupport = pentries->support;
			memcpy(mopat_info.ptgt_stat, ptgt_stat_array, gntgt_stat_size);

			if(pentries->child!=0)
				gen_rules(pentries->child, pentries->npreorder, pentries->support, true, dmax_pvalue);
			else 
			{
				//nsubtree_boundary = ndisk_pos+sizeof(ENTRY)+sizeof(int)*num_of_entries;
				test_onerule(pentries->npreorder, false, dmax_pvalue);
			}

			mnpat_len = norig_pat_len;
			mnactive_top--;
			if(!mpactive_stack[mnactive_top].in_mem)
			{
				DelEntryArray(mpactive_stack[mnactive_top].pentries, mpactive_stack[mnactive_top].num_of_entries);
				DelTgtStatArray(mpactive_stack[mnactive_top].ptgt_stat_array, gntgt_stat_size);
			}
		}
	}
	else 
		printf("Error with number of entires\n");
}

void CTREE_EHTA::test_onerule(int nentry_no, bool bis_singleton_rule, double dmax_pvalue)
{
	TGT_SUM *ptgt_sum, *pglobal_tgt_sum;
	double deffect_size;
	int i;

	if(mopat_info.nsupport==gndb_size)
		return;

	gpsup_testnums[morule.ppat_info->nsupport]++;

	if(gndepth>gnmax_len)
		printf("Error: the depth of context cannot be larger than %d\n", gnmax_len);

	morule.ppat_info->npreorder = nentry_no;

	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
	{
		ptgt_sum = (TGT_SUM*)mopat_info.ptgt_stat;
		mopat_info.dmean = ptgt_sum->dsum/mopat_info.nsupport;
		mopat_info.dstd_dev = sqrt((ptgt_sum->dsquare_sum-ptgt_sum->dsum*mopat_info.dmean)/(mopat_info.nsupport-1));
		pglobal_tgt_sum = (TGT_SUM*)gptgt_stat_array;
		deffect_size = (pglobal_tgt_sum->dsum-ptgt_sum->dsum)/(gndb_size-mopat_info.nsupport)-mopat_info.dmean;
		if(deffect_size<0)
			deffect_size = -deffect_size;
	}
	else if(gsztarget_value[0]!=0)
	{
		mopat_info.dmean = (double)(*((int*)mopat_info.ptgt_stat))/mopat_info.nsupport;
		if(gneffect_size_method==CONFIDENCE)
		{
			deffect_size = mopat_info.dmean; // -(double)(((int*)gptgt_stat_array)[0]-((int*)mopat_info.ptgt_stat)[0])/(gndb_size-mopat_info.nsupport);
			if(deffect_size<0)
				deffect_size = -deffect_size;				
		}
		else //ODDS_RATIO
			deffect_size = (double)(((int*)mopat_info.ptgt_stat)[0])*(gndb_size-mopat_info.nsupport-(((int*)gptgt_stat_array)[0]-((int*)mopat_info.ptgt_stat)[0]))/((mopat_info.nsupport-((int*)mopat_info.ptgt_stat)[0])*(((int*)gptgt_stat_array)[0]-((int*)mopat_info.ptgt_stat)[0]));
		
	}

	if(gntgt_attr_type==CONTINUOUS && gnmin_sup<CLT_THRESHOLD)
	{
		int nlist_len;

		nlist_len = LoadOneTidList(mfp_tidlist, mptidlist_dir_nodes[nentry_no].ntidlist_pos, mptid_list);
		if(nlist_len!=mopat_info.nsupport)
			printf("Error: inconsistent tid list length\n");
	}

	if(gbrule_multiclass_pairwise)
	{
		int ntgt_sup, nglobal_tgt_sup;

		gnum_of_tests += gnum_of_tgt_values;

		morule.dscore = 0;
		for(i=0;i<gnum_of_tgt_values;i++)
		{
			morule.ntgt_class = i;
			ntgt_sup = ((int*)mopat_info.ptgt_stat)[i];
			nglobal_tgt_sup = ((int*)gptgt_stat_array)[i];

			if(gneffect_size_method==CONFIDENCE)
				deffect_size = (double)ntgt_sup/mopat_info.nsupport;
			else //ODDS_RATIO
				deffect_size = (double)ntgt_sup/(mopat_info.nsupport-ntgt_sup)*(gndb_size-mopat_info.nsupport-(nglobal_tgt_sup-ntgt_sup))/(nglobal_tgt_sup-ntgt_sup);
			morule.dpvalue  = CalcTwoTailedFisherPvalue(mopat_info.nsupport, ntgt_sup, i);
			
			
			OutputOnePvalue(mfppvalue, nentry_no, morule.ppat_info->nsupport, ntgt_sup, morule.dpvalue);

			if((dmax_pvalue>=1 || morule.dpvalue<=dmax_pvalue) && deffect_size>=gdmin_diff)
			{
				morule.npat_len = mnpat_len;

				OutputOneRule(mfptext, &morule);
				gnum_of_output_rules++;
				gntotal_context_len += morule.npat_len;	
			}
		}
	}
	else
	{
		gnum_of_tests++;

		//if(gdmax_pvalue>=1 || deffect_size>=gdmin_diff)
		morule.dpvalue = CalcRulePvalue(gndb_size, gptgt_stat_array, mopat_info.nsupport, mopat_info.ptgt_stat, morule.dscore, mptid_list, gptgt_values);
		//else
		//	morule.dpvalue = 1;


		if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
			OutputOnePvalue(mfppvalue, nentry_no, morule.ppat_info->nsupport, (int)(((TGT_SUM*)mopat_info.ptgt_stat)->dsum), morule.dpvalue);
		else 
			OutputOnePvalue(mfppvalue, nentry_no, morule.ppat_info->nsupport, ((int*)mopat_info.ptgt_stat)[0], morule.dpvalue);

		if((dmax_pvalue>=1 || morule.dpvalue<=dmax_pvalue) && (gsztarget_value[0]==0 || deffect_size>=gdmin_diff))
		{
			morule.npat_len = mnpat_len;

			OutputOneRule(mfptext, &morule);
			gnum_of_output_rules++;
			gntotal_context_len += morule.npat_len;	

			//output singleton rules
			if(mnpat_len>1 && bis_singleton_rule && gnoutput_mode==OUTPUT_CLOSED_N_SINGLETON)
			{
				morule.pattern = &mppattern[mnpat_len-1];
				morule.npat_len = 1;

				OutputOneRule(mfptext, &morule);
				gnum_of_output_rules++;
				gntotal_context_len += morule.npat_len;

				morule.pattern = mppattern;
				morule.npat_len = mnpat_len;
			}
		}
	}
}


//==================================================================
// Remove one item from a given rule. 
// szattr_name is the attribute of the item to be removed. 
//if szattr_name=="*", then consider all items in the rule as candidates to be removed.
void CTREE_EHTA::AR_RemoveOneAttr(char* szinput_name, ASSOCRULE *prule, char* szattr_name, PAT_INFO *ppat_info_array, FILE *fpout)
{
	int i, nmissing_item, num_of_found_items;
	map<string, int>::iterator map_it;
	char szerrmsg[200];

	if(prule->npat_len==1)
	{
		CopyPatInfo(0, gndb_size, gptgt_stat_array, &ppat_info_array[prule->pattern[0]]);
		return;
	}

	mpexplore_rule = prule;

	if(strcmp(szattr_name, "*")==0)
	{
		mnexplore_attr_no = -1;
		for(i=0;i<prule->npat_len;i++)
		{
			mpitem_bitmap[prule->pattern[i]] |= EXPLORE_IN_PAT;
			ppat_info_array[prule->pattern[i]].npreorder = -1;
			ppat_info_array[prule->pattern[i]].nsupport = 0;
		}

		mnpat_len = 0;
		mnum_of_shared_items = 0;

		search_immd_subsets(0, prule->npat_len, -1, ppat_info_array);

		for(i=0;i<prule->npat_len;i++)
			mpitem_bitmap[prule->pattern[i]] = 0;

		if(fpout!=NULL)
			output_immd_subsets(fpout, prule, -1, ppat_info_array);


		//searching for infrequent immediate subsets
		num_of_found_items = 0;
		for(i=0;i<prule->npat_len;i++)
		{
			if(ppat_info_array[prule->pattern[i]].npreorder>=0)
				num_of_found_items++;
		}

		if(num_of_found_items<prule->npat_len)
			tidlist_immd_subsets(szinput_name, prule, ppat_info_array, fpout);
	}
	else
	{		
		map_it = gpattr2id_map->find(szattr_name);
		if(map_it==gpattr2id_map->end())
		{
			sprintf(szerrmsg, "Error: cannot find attribute %s", szattr_name);
			LogErrMsg("CTREE_EHTA", "ARRemoveOneAttr", szerrmsg);
			mnexplore_attr_no = -2;
		}
		else			
		{
			mnexplore_attr_no = map_it->second;
			nmissing_item = -1;
			for(i=0;i<prule->npat_len;i++)
			{
				if(gpAttrValues[prule->pattern[i]].nattr_no!=mnexplore_attr_no)
				{
					ppat_info_array[prule->pattern[i]].npreorder = 0;
					mpitem_bitmap[prule->pattern[i]] |= EXPLORE_IN_PAT;
				}
				else
				{
					nmissing_item = prule->pattern[i];
					ppat_info_array[prule->pattern[i]].npreorder = -1;
					ppat_info_array[prule->pattern[i]].nsupport = 0;
				}
			}

			if(nmissing_item==-1)
			{
				sprintf(szerrmsg, "Error: attribute %s is not contained in the pattern", szattr_name);
				LogErrMsg("CTREE_EHTA", "ARRemoveOneAttr", szerrmsg);
				mnexplore_attr_no = -2;
			}
			else
			{
				mnpat_len = 0;
				search_exact(0, prule->npat_len-1, &ppat_info_array[nmissing_item]);

				if(fpout!=NULL && ppat_info_array[nmissing_item].npreorder>=0)
					output_immd_subsets(fpout, prule, nmissing_item, ppat_info_array);

			}
			for(i=0;i<prule->npat_len;i++)
				mpitem_bitmap[prule->pattern[i]] = 0;

			if(nmissing_item>=0 && ppat_info_array[nmissing_item].npreorder==-1)
				tidlist_immd_subsets(szinput_name, prule, ppat_info_array, fpout);
		}
	}
}

void CTREE_EHTA::search_immd_subsets(int ndisk_pos, int num_of_items, int nmissing_item, PAT_INFO *ppat_info_array)
{
	int nfile_pos, num_of_entries;
	ENTRY *pentries;
	char *ptgt_stat_array;
	int *pitems, num_of_matched, i, nlast_index, num_of_contained, nitem;

	nfile_pos = ftell(mfpcfp_file);
	if(ndisk_pos!=nfile_pos)
		fseek(mfpcfp_file, ndisk_pos-nfile_pos, SEEK_CUR);
	fread(&num_of_entries, sizeof(int), 1, mfpcfp_file);

	pentries = &mpentry_buf[mndfs_entry_buf_pos];
	ptgt_stat_array = &mptgt_stat_buf[mndfs_entry_buf_pos*gntgt_stat_size];

	if(num_of_entries==1 || num_of_entries<-1)
	{
		if(num_of_entries<-1)
			num_of_entries = -num_of_entries;

		pitems = &mppattern[mnpat_len];

		fread(pentries, sizeof(ENTRY)-sizeof(int), 1, mfpcfp_file);
		fread(pitems, sizeof(int), num_of_entries, mfpcfp_file);
		fread(ptgt_stat_array, sizeof(char), gntgt_stat_size, mfpcfp_file);

		mndfs_entry_buf_pos++;

		num_of_matched = 0;
		for(i=0;i<num_of_entries;i++)
		{
			if(mpitem_bitmap[pitems[i]] & EXPLORE_IN_PAT)
			{
				mpshared_items[mnum_of_shared_items++] = pitems[i];
				num_of_matched++;
			}
		}

		if(nmissing_item==-1)
		{
			if(num_of_items-num_of_matched==0)
			{
				if(num_of_matched==1)	
				{
					for(i=0;i<mnum_of_shared_items-1;i++)
						CopyPatInfo(pentries->npreorder, pentries->support, ptgt_stat_array, &ppat_info_array[mpshared_items[i]]);
				}
				else if(num_of_matched>1)
				{
					for(i=0;i<mnum_of_shared_items;i++)
						CopyPatInfo(pentries->npreorder, pentries->support, ptgt_stat_array, &ppat_info_array[mpshared_items[i]]);
				}
				else //num_of_matched==0
					printf("Error: there should be at least one item matched\n");
			}
			else if(num_of_items-num_of_matched==1)
			{
				if(num_of_matched>0)
				{
					if(mnum_of_shared_items>0)
						memcpy(&mppattern[mnpat_len], mpshared_items, sizeof(int)*mnum_of_shared_items);
					nitem = get_missing_item(mpexplore_rule, mppattern, mnpat_len+mnum_of_shared_items, mpitem_bitmap);
					CopyPatInfo(pentries->npreorder, pentries->support, ptgt_stat_array, &ppat_info_array[nitem]);
				}
				if(pentries->child!=0 && mnum_of_shared_items>0)
					search_immd_subsets(pentries->child, num_of_items-num_of_matched, nmissing_item, ppat_info_array);
			}
			else if(pentries->child!=0)
				search_immd_subsets(pentries->child, num_of_items-num_of_matched, nmissing_item, ppat_info_array);

		}
		else if(num_of_items-num_of_matched==0)
			CopyPatInfo(pentries->npreorder, pentries->support, ptgt_stat_array, &ppat_info_array[nmissing_item]);
		else if(pentries->child!=0)
			search_immd_subsets(pentries->child, num_of_items-num_of_matched, nmissing_item, ppat_info_array);

		mnum_of_shared_items -= num_of_matched;
		mndfs_entry_buf_pos--;
	}
	else if(num_of_entries>1)
	{
		mndfs_entry_buf_pos += num_of_entries;
		fread(pentries, sizeof(ENTRY), num_of_entries, mfpcfp_file);
		fread(ptgt_stat_array, sizeof(char), num_of_entries*gntgt_stat_size, mfpcfp_file);

		nlast_index = -1;
		num_of_contained = 0;
		for(i=0;i<num_of_entries;i++)
		{
			if(mpitem_bitmap[pentries[i].item] & EXPLORE_IN_PAT)
			{
				nlast_index = i;
				num_of_contained++;
			}
		}
		if(nlast_index>=0)
		{
			if(nmissing_item==-1 && num_of_items>1 && num_of_contained==num_of_items)
			{
				for(i=nlast_index-1;i>=0;i--) //only the entry preceeding nlast_index need to be considered
				{
					if(mpitem_bitmap[pentries[i].item] & EXPLORE_IN_PAT)
					{
						mppattern[mnpat_len++] = pentries[i].item;
						if(num_of_items==2)
							CopyPatInfo(pentries[i].npreorder, pentries[i].support, &ptgt_stat_array[i*gntgt_stat_size], &ppat_info_array[pentries[nlast_index].item]);
						else if(pentries[i].child!=0)
							search_immd_subsets(pentries[i].child, num_of_items-2, pentries[nlast_index].item, ppat_info_array);
						mnpat_len--;
						break;
					}
				}
			}

			mppattern[mnpat_len++] = pentries[nlast_index].item;
			if(nmissing_item==-1)
			{
				if(num_of_items==1)
				{
					if(mnum_of_shared_items>0)
					{
						for(i=0;i<mnum_of_shared_items;i++)
							CopyPatInfo(pentries[nlast_index].npreorder, pentries[nlast_index].support, &ptgt_stat_array[nlast_index*gntgt_stat_size], &ppat_info_array[mpshared_items[i]]);
					}
				}
				else if(num_of_items==2)
				{
					if(mnum_of_shared_items>0)
						memcpy(&mppattern[mnpat_len], mpshared_items, mnum_of_shared_items*sizeof(int));
					nitem = get_missing_item(mpexplore_rule, mppattern, mnpat_len+mnum_of_shared_items, mpitem_bitmap);
					CopyPatInfo(pentries[nlast_index].npreorder, pentries[nlast_index].support, &ptgt_stat_array[nlast_index*gntgt_stat_size], &ppat_info_array[nitem]);

					if(pentries[nlast_index].child!=0 && num_of_contained==2 && mnum_of_shared_items>0)
						search_immd_subsets(pentries[nlast_index].child, num_of_items-1, nmissing_item, ppat_info_array);
				}
				else if(pentries[nlast_index].child!=0 && num_of_contained>=num_of_items-1)
					search_immd_subsets(pentries[nlast_index].child, num_of_items-1, nmissing_item, ppat_info_array);
			}
			else if(num_of_items==1)
				CopyPatInfo(pentries[nlast_index].npreorder, pentries[nlast_index].support, &ptgt_stat_array[nlast_index*gntgt_stat_size], &ppat_info_array[nmissing_item]);
			else if(pentries[nlast_index].child!=0 && num_of_contained==num_of_items)
				search_immd_subsets(pentries[nlast_index].child, num_of_items-1, nmissing_item, ppat_info_array);
			mnpat_len--;
		}
		mndfs_entry_buf_pos -= num_of_entries;
	}
}

bool CTREE_EHTA::search_exact(int ndisk_pos, int num_of_items, PAT_INFO *ppat_info)
{
	int nfile_pos, num_of_entries;
	ENTRY *pentries;
	char *ptgt_stat_array;
	int *pitems, num_of_matched, nlast_index, i, num_of_contained;
	bool bfound;

	nfile_pos = ftell(mfpcfp_file);
	if(ndisk_pos!=nfile_pos)
		fseek(mfpcfp_file, ndisk_pos-nfile_pos, SEEK_CUR);
	fread(&num_of_entries, sizeof(int), 1, mfpcfp_file);

	pentries = &mpentry_buf[mndfs_entry_buf_pos];
	ptgt_stat_array = &mptgt_stat_buf[mndfs_entry_buf_pos*gntgt_stat_size];

	bfound = false;

	if(num_of_entries==1 || num_of_entries<-1)
	{
		if(num_of_entries<-1)
			num_of_entries = -num_of_entries;

		pitems = &mppattern[mnpat_len];
		fread(pentries, sizeof(ENTRY)-sizeof(int), 1, mfpcfp_file);
		fread(pitems, sizeof(int), num_of_entries, mfpcfp_file);
		fread(ptgt_stat_array, sizeof(char), gntgt_stat_size, mfpcfp_file);
		mndfs_entry_buf_pos++;

		num_of_matched = 0;
		for(i=0;i<num_of_entries;i++)
		{
			if(mpitem_bitmap[pitems[i]]==1)
			{
				mppattern[mnpat_len++] = pitems[i];
				num_of_matched++;
			}
		}

		if(num_of_matched==num_of_items)
		{
			bfound = true;
			CopyPatInfo(pentries->npreorder, pentries->support, ptgt_stat_array, ppat_info);
		}
		else if(pentries->child!=0)
			bfound = search_exact(pentries->child, num_of_items-num_of_matched, ppat_info);
		mnpat_len -= num_of_matched;
		mndfs_entry_buf_pos--;
	}
	else if(num_of_entries>1)
	{
		bfound = false;

		mndfs_entry_buf_pos += num_of_entries;
		fread(pentries, sizeof(ENTRY), num_of_entries, mfpcfp_file);
		fread(ptgt_stat_array, sizeof(char), num_of_entries*gntgt_stat_size, mfpcfp_file);

		nlast_index = -1;
		num_of_contained = 0;
		for(i=0;i<num_of_entries;i++)
		{
			if(mpitem_bitmap[pentries[i].item]==1)
			{
				nlast_index = i;
				num_of_contained++;
			}
		}

		if(num_of_contained==num_of_items)
		{
			mppattern[mnpat_len++] = pentries[nlast_index].item;

			if(num_of_items==1)
			{
				bfound = true;
				CopyPatInfo(pentries[nlast_index].npreorder, pentries[nlast_index].support, &ptgt_stat_array[nlast_index*gntgt_stat_size], ppat_info);
			}
			else if(pentries[nlast_index].child!=0)
				bfound = search_exact(pentries[nlast_index].child, num_of_items-1, ppat_info);
			mnpat_len--;
		}
		mndfs_entry_buf_pos -= num_of_entries;
	}
	else 
		printf("Error with number of entries\n");

	return bfound;
}

int comp_tidlist_nodes_status(const void *e1, const void* e2)
{
	TIDLIST_NODE *p1, *p2;

	p1 = (TIDLIST_NODE*) e1;
	p2 = (TIDLIST_NODE*) e2;

	if(p1->nstatus> p2->nstatus)
		return -1;
	else if(p1->nstatus< p2->nstatus)
		return 1;
	else if(p1->nitem_tidlist_len < p2->nitem_tidlist_len)
		return -1;
	else if(p1->nitem_tidlist_len < p2->nitem_tidlist_len)
		return 1;
	else 
		return 0;
}

int comp_tidlist_nodes_item(const void *e1, const void* e2)
{
	TIDLIST_NODE *p1, *p2;

	p1 = (TIDLIST_NODE*) e1;
	p2 = (TIDLIST_NODE*) e2;

	if(p1->nitem< p2->nitem)
		return -1;
	else if(p1->nitem> p2->nitem)
		return 1;
	else 
		return 0;
}

void CTREE_EHTA::tidlist_immd_subsets(char* szinput_name, ASSOCRULE *prule, PAT_INFO *ppat_info_array, FILE *fpout)
{
	FILE *fp;
	char sztidlist_filename[200], szerrmsg[200];	
	TIDLIST_NODE *pnodes;
	int i, j, nload_status, nprefix_len, nprefix_tidlist_len, *pprefix_tidlist;
	int ntidlist_len, nmax_tidlist_len, *ptemp_tidlist, nresult_len, *presult_tidlist, nitem, nstart_pos;
	bool btgt_value_loaded;
	double dscore;

	sprintf(sztidlist_filename, "%s.item.tidlist", szinput_name);
	fp = fopen(sztidlist_filename, "rb");
	if(fp==NULL)
	{
		sprintf(szerrmsg, "Error: cannot open file %s for read.", sztidlist_filename);
		LogErrMsg("", "LoadOneItemTidlist", szerrmsg);
		return;
	}

	pnodes = new TIDLIST_NODE[prule->npat_len];
	IncMemSize(sizeof(TIDLIST_NODE)*prule->npat_len);
	memset(pnodes, 0, sizeof(TIDLIST_NODE)*prule->npat_len);

	nprefix_len = 0;
	for(i=0;i<prule->npat_len;i++)
	{
		pnodes[i].nitem = prule->pattern[i];
		if(ppat_info_array[prule->pattern[i]].npreorder>=0)
		{
			pnodes[i].nstatus = 1;
			nprefix_len++;
		}
	}

	if(nprefix_len<prule->npat_len)
	{
		qsort(pnodes, prule->npat_len, sizeof(TIDLIST_NODE), comp_tidlist_nodes_item);

		nload_status = LoadItemListPos(szinput_name, pnodes, prule->npat_len);
		if(nload_status==0)
		{
			if(gptgt_values==NULL)
			{
				btgt_value_loaded = true;
				LoadTransTgtValues(szinput_name);
			}
			else
				btgt_value_loaded = false;

			nstart_pos = 0;
			for(i=0;i<prule->npat_len;i++)
			{
				if(pnodes[i].nstatus==0 && pnodes[i].nitem_tidlist_len==gndb_size)
				{
					pnodes[i].nstatus = 2;
					nitem = pnodes[i].nitem;
					CopyPatInfo(prule->ppat_info, &ppat_info_array[nitem]);
					if(fpout!=NULL)
					{
						for(j=0;j<i;j++)
							morule.pattern[j] = pnodes[j].nitem;
						for(j=i+1;j<prule->npat_len;j++)
							morule.pattern[j-1] = pnodes[j].nitem;
						morule.ppat_info = &ppat_info_array[nitem];
						morule.dpvalue = prule->dpvalue;
						morule.dscore = prule->dscore;
						morule.dcond_pvalue = 1;
						OutputOneRule(fpout, &morule);
					}
					nstart_pos++;
				}
			}

			qsort(pnodes, prule->npat_len, sizeof(TIDLIST_NODE), comp_tidlist_nodes_status);
			nmax_tidlist_len = 0;
			for(i=0;i<prule->npat_len;i++)
			{
				morule.pattern[i] = pnodes[i].nitem;
				if(nmax_tidlist_len<pnodes[i].nitem_tidlist_len && pnodes[i].nitem_tidlist_len<gndb_size)
					nmax_tidlist_len = pnodes[i].nitem_tidlist_len;
			}

			pprefix_tidlist = NewIntArray(pnodes[nstart_pos].nitem_tidlist_len);
			ptemp_tidlist = NewIntArray(nmax_tidlist_len);
			presult_tidlist = NewIntArray(nmax_tidlist_len);

			if(nprefix_len>0)
				nprefix_tidlist_len = GenTidList(fp, &pnodes[nstart_pos], nprefix_len, pprefix_tidlist, ptemp_tidlist, prule->ppat_info->nsupport);
			else
				nprefix_tidlist_len = 0;

			nprefix_len += nstart_pos;
			if(nprefix_tidlist_len==prule->ppat_info->nsupport)
			{
				for(i=prule->npat_len-1;i>=nprefix_len;i--)
				{
					nitem = pnodes[prule->npat_len-1].nitem;
					CopyPatInfo(prule->ppat_info, &ppat_info_array[nitem]);
					if(fpout!=NULL)
					{
						for(j=i+1;j<prule->npat_len;j++)
							morule.pattern[j-1] = pnodes[j].nitem;
						morule.ppat_info = &ppat_info_array[nitem];
						morule.dpvalue = prule->dpvalue;
						morule.dscore = prule->dscore;
						morule.dcond_pvalue = CalcRulePvalue(ppat_info_array[nitem].nsupport, ppat_info_array[nitem].ptgt_stat, prule->ppat_info->nsupport, prule->ppat_info->ptgt_stat, dscore);
						OutputOneRule(fpout, &morule);
					}
				}
			}
			else if(nprefix_len<prule->npat_len)
			{
				ntidlist_len = -1;
				for(i=nprefix_len;i<prule->npat_len-1;i++)
				{
					pnodes[i].pitem_tid_list = NewIntArray(pnodes[i].nitem_tidlist_len);
					LoadOneItemTidlist(fp, pnodes[i].ndisk_pos, pnodes[i].pitem_tid_list, pnodes[i].nitem_tidlist_len);
					if(i>nprefix_len && ntidlist_len==prule->ppat_info->nsupport)
						pnodes[i].ntidlist_len = ntidlist_len; 
					else 
					{
						if(i==nprefix_len)
						{
							if(nprefix_tidlist_len>0)
								ntidlist_len = get_intersection(nprefix_tidlist_len, pprefix_tidlist, pnodes[i].nitem_tidlist_len, pnodes[i].pitem_tid_list, ptemp_tidlist);
							else
							{
								ntidlist_len = pnodes[i].nitem_tidlist_len;
								memcpy(ptemp_tidlist, pnodes[i].pitem_tid_list, sizeof(int)*pnodes[i].nitem_tidlist_len);
							}
						}
						else //if(ntidlist_len>prule->ppat_info->nsupport)
							ntidlist_len = get_intersection(ntidlist_len, ptemp_tidlist, pnodes[i].nitem_tidlist_len, pnodes[i].pitem_tid_list, ptemp_tidlist);
						pnodes[i].ptid_list = NewIntArray(ntidlist_len);
						memcpy(pnodes[i].ptid_list, ptemp_tidlist, sizeof(int)*ntidlist_len);
						pnodes[i].ntidlist_len = ntidlist_len;
					}	
				}

				morule.npat_len = prule->npat_len-1;
				nitem = pnodes[prule->npat_len-1].nitem;
				if(ntidlist_len==prule->ppat_info->nsupport)
				{
					CopyPatInfo(prule->ppat_info, &ppat_info_array[nitem]);
					if(fpout!=NULL)
					{
						morule.ppat_info = &ppat_info_array[nitem];
						morule.dpvalue = prule->dpvalue;
						morule.dscore = prule->dscore;
						morule.dcond_pvalue = 1;
						OutputOneRule(fpout, &morule);
					}
				}
				else if(prule->npat_len>nprefix_len+1 || nprefix_tidlist_len>0)
				{
					if(prule->npat_len>nprefix_len+1)
						get_pat_info(ptemp_tidlist, ntidlist_len, &ppat_info_array[nitem]);
					else
						get_pat_info(pprefix_tidlist, nprefix_tidlist_len, &ppat_info_array[nitem]);
					if(fpout!=NULL)
					{
						morule.ppat_info = &ppat_info_array[nitem];
						morule.dpvalue = CalcRulePvalue(gndb_size, gptgt_stat_array, ppat_info_array[nitem].nsupport, ppat_info_array[nitem].ptgt_stat, morule.dscore, ptemp_tidlist, gptgt_values);
						morule.dcond_pvalue = CalcRulePvalue(ppat_info_array[nitem].nsupport, ppat_info_array[nitem].ptgt_stat, prule->ppat_info->nsupport, prule->ppat_info->ptgt_stat, dscore);
						OutputOneRule(fpout, &morule);
					}
				}

				ntidlist_len = pnodes[prule->npat_len-1].nitem_tidlist_len;
				LoadOneItemTidlist(fp, pnodes[prule->npat_len-1].ndisk_pos, ptemp_tidlist, ntidlist_len);
				for(i=prule->npat_len-2;i>=nprefix_len;i--)
				{
					nitem = pnodes[i].nitem;
					if(ntidlist_len==prule->ppat_info->nsupport|| i>nprefix_len && pnodes[i-1].ntidlist_len==prule->ppat_info->nsupport)
					{
						CopyPatInfo(prule->ppat_info, &ppat_info_array[nitem]);
						if(fpout!=NULL)
						{
							for(j=i+1;j<prule->npat_len;j++)
								morule.pattern[j-1] = pnodes[j].nitem;
							morule.dpvalue = prule->dpvalue;
							morule.dscore = prule->dscore;
							morule.ppat_info = &ppat_info_array[nitem];
							morule.dcond_pvalue = 1;
							OutputOneRule(fpout, &morule);
						}
					}
					else 
					{
						if(i>nprefix_len)
							nresult_len = get_intersection(ntidlist_len, ptemp_tidlist, pnodes[i-1].ntidlist_len, pnodes[i-1].ptid_list, presult_tidlist);
						else if(nprefix_tidlist_len>0)
							nresult_len = get_intersection(ntidlist_len, ptemp_tidlist, nprefix_tidlist_len, pprefix_tidlist, presult_tidlist);
						else
						{
							nresult_len = ntidlist_len;
							memcpy(presult_tidlist, ptemp_tidlist, sizeof(int)*ntidlist_len);
						}
						get_pat_info(presult_tidlist, nresult_len, &ppat_info_array[nitem]);		
						
						if(fpout!=NULL)
						{
							for(j=i+1;j<prule->npat_len;j++)
								morule.pattern[j-1] = pnodes[j].nitem;
							morule.dpvalue = CalcRulePvalue(gndb_size, gptgt_stat_array, ppat_info_array[nitem].nsupport, ppat_info_array[nitem].ptgt_stat, morule.dscore, presult_tidlist, gptgt_values);
							morule.ppat_info = &ppat_info_array[nitem];
							morule.dcond_pvalue = CalcRulePvalue(ppat_info_array[nitem].nsupport, ppat_info_array[nitem].ptgt_stat, prule->ppat_info->nsupport, prule->ppat_info->ptgt_stat, dscore);
							OutputOneRule(fpout, &morule);
						}
					}
					if(i>nprefix_len && ntidlist_len>prule->ppat_info->nsupport)
						ntidlist_len = get_intersection(ntidlist_len, ptemp_tidlist, pnodes[i].nitem_tidlist_len, pnodes[i].pitem_tid_list, ptemp_tidlist);
				}
			}

			DelIntArray(pprefix_tidlist, pnodes[0].nitem_tidlist_len);
			DelIntArray(ptemp_tidlist, nmax_tidlist_len);
			DelIntArray(presult_tidlist, nmax_tidlist_len);

			if(btgt_value_loaded)
				DelTransTgtValues();

			for(i=nprefix_len;i<prule->npat_len;i++)
			{
				if(pnodes[i].pitem_tid_list!=NULL)
					DelIntArray(pnodes[i].pitem_tid_list, pnodes[i].nitem_tidlist_len);
				if(pnodes[i].ptid_list!=NULL)
					DelIntArray(pnodes[i].ptid_list, pnodes[i].ntidlist_len);
			}
		}
	}
	else
		LogErrMsg("CTREE_EHTA", "tidlist_immd_subsets", "Error: All items have been covered.");
	
	delete []pnodes;
	DecMemSize(sizeof(TIDLIST_NODE)*prule->npat_len);

	fclose(fp);
}

void CTREE_EHTA::output_one_freq_rule(FILE *fpout, ASSOCRULE *prule, PAT_INFO *ppat_info)
{
	if(ppat_info->nsupport==prule->ppat_info->nsupport)
	{
		morule.dpvalue = prule->dpvalue;
		morule.dscore = prule->dscore;
	}
	else
	{
		if(gntgt_attr_type==CONTINUOUS && gnmin_sup<CLT_THRESHOLD)
			LoadOneTidList(mfp_tidlist_dir, mfp_tidlist, ppat_info->npreorder, mptid_list, ppat_info->nsupport);
		morule.dpvalue = CalcRulePvalue(gndb_size, gptgt_stat_array, ppat_info->nsupport, ppat_info->ptgt_stat, morule.dscore, mptid_list, gptgt_values);
	}
	morule.ppat_info = ppat_info;
	OutputOneRule(fpout, &morule);
}

void CTREE_EHTA::output_immd_subsets(FILE *fpout, ASSOCRULE *prule, int nremoved_item, PAT_INFO *ppat_info_array)
{
	int i, j, nitem;
	double dscore;

	morule.npat_len = prule->npat_len-1;

	if(nremoved_item>=0)
	{
		j = 0;
		for(i=0;i<prule->npat_len;i++)
		{
			if(prule->pattern[i]!=nremoved_item)
				morule.pattern[j++] = prule->pattern[i];
		}
		morule.dcond_pvalue = CalcRulePvalue(ppat_info_array[nremoved_item].nsupport, ppat_info_array[nremoved_item].ptgt_stat, prule->ppat_info->nsupport, prule->ppat_info->ptgt_stat, dscore);
		output_one_freq_rule(fpout, prule, &ppat_info_array[nremoved_item]);
	}
	else
	{

		memcpy(morule.pattern, prule->pattern, morule.npat_len*sizeof(int));

		for(i=prule->npat_len-1;i>=0;i--)
		{
			nitem = prule->pattern[i];
			if(ppat_info_array[nitem].npreorder>=0)
			{
				for(j=i+1;j<prule->npat_len;j++)
					morule.pattern[j-1] = prule->pattern[j];

				morule.dcond_pvalue = CalcRulePvalue(ppat_info_array[nitem].nsupport, ppat_info_array[nitem].ptgt_stat, prule->ppat_info->nsupport, prule->ppat_info->ptgt_stat, dscore);
				output_one_freq_rule(fpout, prule, &ppat_info_array[nitem]);	
			}
		}
	}
}


// Add one item to a given rule. 
// szattr_name is the attribute of the item to be added. 
//if szattr_name=="*", then consider all items outside of the rule as candidates to be added.
void CTREE_EHTA::AR_AddOneAttr(char* szinput_name, ASSOCRULE *prule, char* szattr_name, PAT_INFO *ppat_info_array, FILE *fpout)
{
	int i, num_of_extra_items, num_of_extra_items_covered;
	map<string, int>::iterator map_it;
	char szerrmsg[200];

	for(i=0;i<prule->npat_len;i++)
		mpitem_bitmap[prule->pattern[i]] = 1;

	if(strcmp(szattr_name, "*")==0)
		mnexplore_attr_no = -1;
	else
	{		
		map_it = gpattr2id_map->find(szattr_name);
		if(map_it==gpattr2id_map->end())
		{
			sprintf(szerrmsg, "Error: cannot find attribute %s", szattr_name);
			LogErrMsg("CTREE_EHTA", "ARAddOneAttr", szerrmsg);
			mnexplore_attr_no = -2;
		}
		else
		{
			mnexplore_attr_no = map_it->second;
			for(i=0;i<prule->npat_len;i++)
			{
				if(mnexplore_attr_no==gpAttrValues[prule->pattern[i]].nattr_no)
				{
					sprintf(szerrmsg, "Error: attribute %s should not appear in the rule", szattr_name);
					LogErrMsg("CTREE_EHTA", "ARAddOneAttr", szerrmsg);
					mnexplore_attr_no = -2;
					break;
				}
			}
		}
	}

	num_of_extra_items = 0; 
	if(mnexplore_attr_no==-1)
	{
		for(i=0;i<gnum_of_items;i++)
		{
			if(mpitem_bitmap[i]==0)
			{
				mpitem_bitmap[i] = 2;
				ppat_info_array[i].npreorder = -1;
				num_of_extra_items++;
			}
		}
	}
	else if(mnexplore_attr_no>=0)
	{
		for(i=gpAttributes[mnexplore_attr_no].nstart_item_id;i<gpAttributes[mnexplore_attr_no].nstart_item_id+gpAttributes[mnexplore_attr_no].num_of_values;i++)
		{
			mpitem_bitmap[i] = 2;
			ppat_info_array[i].npreorder = -1;
			num_of_extra_items++;
		}
	}

	if(mnexplore_attr_no>=-1)
	{
		mnpat_len = 0;
		mnum_of_shared_items = 0;

		search_immd_supersets(0, prule->npat_len, -1, ppat_info_array);

		if(fpout!=NULL)
			output_immd_supersets(fpout, prule, mnexplore_attr_no, ppat_info_array);

		
		//searching for infrequent itemsets
		num_of_extra_items_covered = 0;
		if(mnexplore_attr_no==-1)
		{
			for(i=0;i<gnum_of_items;i++)
			{
				if(mpitem_bitmap[i]==2 && ppat_info_array[i].npreorder>=0)
					num_of_extra_items_covered++;
			}
		}
		else if(mnexplore_attr_no>=0)
		{
			for(i=gpAttributes[mnexplore_attr_no].nstart_item_id;i<gpAttributes[mnexplore_attr_no].nstart_item_id+gpAttributes[mnexplore_attr_no].num_of_values;i++)
			{
				if(ppat_info_array[i].npreorder>=0)
					num_of_extra_items_covered++;
			}
		}

		if(num_of_extra_items_covered<num_of_extra_items)
			tidlist_immd_supersets(szinput_name, prule, mnexplore_attr_no, ppat_info_array, fpout);
	}


	if(mnexplore_attr_no==-1)
		memset(mpitem_bitmap, 0, gnum_of_items);
	else if(mnexplore_attr_no>=0)
	{
		for(i=0;i<prule->npat_len;i++)
			mpitem_bitmap[prule->pattern[i]] = 0;
		for(i=gpAttributes[mnexplore_attr_no].nstart_item_id;i<gpAttributes[mnexplore_attr_no].nstart_item_id+gpAttributes[mnexplore_attr_no].num_of_values;i++)
			mpitem_bitmap[i] = 0;
	}
}

void CTREE_EHTA::search_immd_supersets(int ndisk_pos, int num_of_items, int nextra_item, PAT_INFO *ppat_info_array)
{
	int nfile_pos, num_of_entries;
	ENTRY *pentries;
	char *ptgt_stat_array;
	int *pitems, num_of_matched, nlast_index, i, num_of_contained, norig_shared_items;

	nfile_pos = ftell(mfpcfp_file);
	if(ndisk_pos!=nfile_pos)
		fseek(mfpcfp_file, ndisk_pos-nfile_pos, SEEK_CUR);
	fread(&num_of_entries, sizeof(int), 1, mfpcfp_file);

	pentries = &mpentry_buf[mndfs_entry_buf_pos];
	ptgt_stat_array = &mptgt_stat_buf[mndfs_entry_buf_pos*gntgt_stat_size];

	if(num_of_entries==1 || num_of_entries<-1)
	{
		if(num_of_entries<-1)
			num_of_entries = -num_of_entries;

		pitems = &mppattern[mnpat_len];
		fread(pentries, sizeof(ENTRY)-sizeof(int), 1, mfpcfp_file);
		fread(pitems, sizeof(int), num_of_entries, mfpcfp_file);
		fread(ptgt_stat_array, sizeof(char), gntgt_stat_size, mfpcfp_file);
		mndfs_entry_buf_pos++;

		norig_shared_items = mnum_of_shared_items;

		if(num_of_items>0)
		{
			num_of_matched = 0;
			for(i=0;i<num_of_entries;i++)
			{
				if(mpitem_bitmap[pitems[i]]==1)
				{
					mppattern[mnpat_len++] = pitems[i];
					num_of_matched++;
				}
				else if(mpitem_bitmap[pitems[i]]==2)
					mpshared_items[mnum_of_shared_items++] = pitems[i];
			}

			if(num_of_matched==num_of_items)
			{
				if(nextra_item==-1)
				{
					for(i=0;i<mnum_of_shared_items;i++)
						CopyPatInfo(pentries->npreorder, pentries->support, ptgt_stat_array, &ppat_info_array[mpshared_items[i]]);

					if(pentries->child!=0)
						search_immd_supersets(pentries->child, 0, -1, ppat_info_array);
				}
				else
					CopyPatInfo(pentries->npreorder, pentries->support, ptgt_stat_array, &ppat_info_array[nextra_item]);
			}
			else if(pentries->child!=0)
				search_immd_supersets(pentries->child, num_of_items-num_of_matched, nextra_item, ppat_info_array);

			mnpat_len -= num_of_matched;
			mnum_of_shared_items = norig_shared_items;
		}
		else 
		{
			if(nextra_item!=-1)
				printf("Error: the value of nextra_item should be -1\n");
			for(i=0;i<num_of_entries;i++)
			{
				if(mpitem_bitmap[pitems[i]]==2)
					CopyPatInfo(pentries->npreorder, pentries->support, ptgt_stat_array, &ppat_info_array[pitems[i]]);
			}
			if(pentries->child!=0)
				search_immd_supersets(pentries->child, 0, nextra_item, ppat_info_array);
		}
		mndfs_entry_buf_pos--;
	}
	else if(num_of_entries>1)
	{
		mndfs_entry_buf_pos += num_of_entries;
		fread(pentries, sizeof(ENTRY), num_of_entries, mfpcfp_file);
		fread(ptgt_stat_array, sizeof(char), num_of_entries*gntgt_stat_size, mfpcfp_file);

		nlast_index = -1;
		num_of_contained = 0;
		if(num_of_items>0)
		{
			for(i=0;i<num_of_entries;i++)
			{
				if(mpitem_bitmap[pentries[i].item]==1)
				{
					nlast_index = i;
					num_of_contained++;
				}
			}

			if(num_of_contained==num_of_items)
			{
				mppattern[mnpat_len++] = pentries[nlast_index].item;
				if(num_of_items==1)
				{
					if(nextra_item==-1)
					{
						for(i=0;i<mnum_of_shared_items;i++)
							CopyPatInfo(pentries[nlast_index].npreorder, pentries[nlast_index].support, &ptgt_stat_array[nlast_index*gntgt_stat_size], &ppat_info_array[mpshared_items[i]]);

						if(pentries[nlast_index].child!=0)
							search_immd_supersets(pentries[nlast_index].child, 0, -1, ppat_info_array);
					}
					else
						CopyPatInfo(pentries[nlast_index].npreorder, pentries[nlast_index].support, &ptgt_stat_array[nlast_index*gntgt_stat_size], &ppat_info_array[nextra_item]);
				}
				else if(pentries[nlast_index].child!=0)
					search_immd_supersets(pentries[nlast_index].child, num_of_items-1, nextra_item, ppat_info_array);
				mnpat_len--;

				if(nextra_item==-1)
				{
					for(i=nlast_index+1;i<num_of_entries;i++)
					{
						if(mpitem_bitmap[pentries[i].item]==2)
						{
							mppattern[mnpat_len++] = pentries[i].item;
							if(pentries[i].child!=0)
								search_immd_supersets(pentries[i].child, num_of_items, pentries[i].item, ppat_info_array);
							mnpat_len--;
						}
					}
				}
			}
		}
		else 
		{
			if(nextra_item!=-1)
				printf("Error: the value of nextra_item should be -1\n");
			for(i=0;i<num_of_entries;i++)
			{
				if(mpitem_bitmap[pentries[i].item]==2)
				{
					mppattern[mnpat_len++] = pentries[i].item;
					CopyPatInfo(pentries[i].npreorder, pentries[i].support, &ptgt_stat_array[i*gntgt_stat_size], &ppat_info_array[pentries[i].item]);
					mnpat_len--;
				}
			}
		}
		mndfs_entry_buf_pos -= num_of_entries;
	}
}


void CTREE_EHTA::output_immd_supersets(FILE *fpout, ASSOCRULE *prule, int nextra_attr_no, PAT_INFO *ppat_info_array)
{
	double dscore;
	int i;

	morule.npat_len = prule->npat_len+1;
	memcpy(morule.pattern, prule->pattern, sizeof(int)*prule->npat_len);

	if(nextra_attr_no==-1)
	{
		for(i=0;i<gnum_of_items;i++)
		{
			if(mpitem_bitmap[i]==2 && ppat_info_array[i].npreorder>=0 && ppat_info_array[i].nsupport>0)
			{
				morule.pattern[prule->npat_len] = i;
				morule.dcond_pvalue = CalcRulePvalue(prule->ppat_info->nsupport, prule->ppat_info->ptgt_stat, ppat_info_array[i].nsupport, ppat_info_array[i].ptgt_stat, dscore);
				output_one_freq_rule(fpout, prule, &ppat_info_array[i]);
			}
		}
	}
	else 
	{
		for(i=gpAttributes[nextra_attr_no].nstart_item_id;i<gpAttributes[nextra_attr_no].nstart_item_id+gpAttributes[nextra_attr_no].num_of_values;i++)
		{
			if(ppat_info_array[i].npreorder>=0 && ppat_info_array[i].nsupport>0)
			{
				morule.pattern[prule->npat_len] = i;
				morule.dcond_pvalue = CalcRulePvalue(prule->ppat_info->nsupport, prule->ppat_info->ptgt_stat, ppat_info_array[i].nsupport, ppat_info_array[i].ptgt_stat, dscore);
				output_one_freq_rule(fpout, prule, &ppat_info_array[i]);
			}
		}
	}
}

void CTREE_EHTA::tidlist_immd_supersets(char* szinput_name, ASSOCRULE *prule, int nextra_attr_no, PAT_INFO *ppat_info_array, FILE *fpout)
{
	FILE *fp, *fpdir;
	char sztidlist_filename[200], szerrmsg[200];	
	TIDLIST_NODE *pnodes;
	int i, nprefix_tidlist_len, *pprefix_tidlist;
	int ntidlist_len, *ptemp_tidlist, nmax_tidlist_len;
	bool btgt_value_loaded;
	int nstart_pos, nend_pos, nitem_tidlist_capacity, ntidlist_disk_pos, nitem_tidlist_len;
	double dscore;

	sprintf(sztidlist_filename, "%s.item.tidlist", szinput_name);
	fp = fopen(sztidlist_filename, "rb");
	if(fp==NULL)
	{
		sprintf(szerrmsg, "Error: cannot open file %s for read.", sztidlist_filename);
		LogErrMsg("", "LoadOneItemTidlist", szerrmsg);
		return;
	}

	sprintf(sztidlist_filename, "%s.item.tidlist.dir", szinput_name);
	fpdir = fopen(sztidlist_filename, "rb");
	if(fpdir==NULL)
	{
		sprintf(szerrmsg, "Error: cannot open file %s for read.", sztidlist_filename);
		LogErrMsg("", "LoadOneItemTidlist", szerrmsg);
		return;
	}


	pprefix_tidlist = NULL;
	nprefix_tidlist_len = 0;

	if(prule->npat_len>0 && prule->ppat_info->nsupport<gndb_size)
	{
		pnodes = new TIDLIST_NODE[prule->npat_len];
		IncMemSize(sizeof(TIDLIST_NODE)*prule->npat_len);
		memset(pnodes, 0, sizeof(TIDLIST_NODE)*prule->npat_len);
		for(i=0;i<prule->npat_len;i++)
			pnodes[i].nitem = prule->pattern[i];

		qsort(pnodes, prule->npat_len, sizeof(TIDLIST_NODE), comp_tidlist_nodes_item);
		nmax_tidlist_len = 0;
		for(i=0;i<prule->npat_len;i++)
		{
			pnodes[i].ndisk_pos = LoadOneItemListPos(fpdir, pnodes[i].nitem, pnodes[i].nitem_tidlist_len);
			if(nmax_tidlist_len<pnodes[i].nitem_tidlist_len && pnodes[i].nitem_tidlist_len<gndb_size)
				nmax_tidlist_len = pnodes[i].nitem_tidlist_len;
		}

		nitem_tidlist_capacity = nmax_tidlist_len;
		ptemp_tidlist = NewIntArray(nitem_tidlist_capacity); 

		pprefix_tidlist = NewIntArray(nmax_tidlist_len);

		nprefix_tidlist_len = GenTidList(fp, pnodes, prule->npat_len, pprefix_tidlist, ptemp_tidlist, prule->ppat_info->nsupport);
		if(nprefix_tidlist_len!=prule->ppat_info->nsupport)
			LogErrMsg("CTREE_EHTA", "tidlist_immd_supersets", "Tid list length is inconsistent with pattern support.");

		delete []pnodes;
		DecMemSize(sizeof(TIDLIST_NODE)*prule->npat_len);
	}
	else
	{
		nitem_tidlist_capacity = 1000;
		ptemp_tidlist = NewIntArray(nitem_tidlist_capacity); 
	}

	if(gptgt_values==NULL)
	{
		btgt_value_loaded = true;
		LoadTransTgtValues(szinput_name);
	}
	else
		btgt_value_loaded = false;

	if(nextra_attr_no==-1)
	{
		nstart_pos = 0;
		nend_pos = gnum_of_items;
	}
	else
	{
		nstart_pos = gpAttributes[nextra_attr_no].nstart_item_id;
		nend_pos = nstart_pos+gpAttributes[nextra_attr_no].num_of_values;
	}

	for(i=nstart_pos;i<nend_pos;i++)
	{
		if(mpitem_bitmap[i]==2 && ppat_info_array[i].npreorder==-1)
		{
			ntidlist_disk_pos = LoadOneItemListPos(fpdir, i, nitem_tidlist_len);

			if(nitem_tidlist_len>0)
			{
				if(nitem_tidlist_len>nitem_tidlist_capacity)
					nitem_tidlist_capacity = ResizeArray(ptemp_tidlist, nitem_tidlist_capacity, nitem_tidlist_len);
				LoadOneItemTidlist(fp, ntidlist_disk_pos, ptemp_tidlist, nitem_tidlist_len);

				if(nprefix_tidlist_len>0)
					ntidlist_len = get_intersection(nprefix_tidlist_len, pprefix_tidlist, nitem_tidlist_len, ptemp_tidlist, ptemp_tidlist);
				else
					ntidlist_len = nitem_tidlist_len;

				if(ntidlist_len==prule->ppat_info->nsupport)
				{
					CopyPatInfo(prule->ppat_info, &ppat_info_array[i]);
					if(fpout!=NULL)
					{
						morule.pattern[prule->npat_len] = i;
						morule.ppat_info = &ppat_info_array[i];
						morule.dpvalue = prule->dpvalue;
						morule.dscore = prule->dscore;
						morule.dcond_pvalue = 1;
						OutputOneRule(fpout, &morule);
					}
				}
				else if(ntidlist_len>0)
				{
					get_pat_info(ptemp_tidlist, ntidlist_len, &ppat_info_array[i]);
					if(fpout!=NULL && ppat_info_array[i].nsupport>0)
					{
						morule.pattern[prule->npat_len] = i;
						morule.ppat_info = &ppat_info_array[i];
						morule.dpvalue = CalcRulePvalue(gndb_size, gptgt_stat_array, ppat_info_array[i].nsupport, ppat_info_array[i].ptgt_stat, morule.dscore, ptemp_tidlist, gptgt_values);
						morule.dcond_pvalue = CalcRulePvalue(prule->ppat_info->nsupport, prule->ppat_info->ptgt_stat, ppat_info_array[i].nsupport, ppat_info_array[i].ptgt_stat, dscore);
						OutputOneRule(fpout, &morule);
					}
				}
			}
		}
	}

	if(btgt_value_loaded)
		DelTransTgtValues();

	if(pprefix_tidlist!=NULL)
		DelIntArray(pprefix_tidlist, nmax_tidlist_len);
	DelIntArray(ptemp_tidlist, nitem_tidlist_capacity);	

	fclose(fp);
	fclose(fpdir);
}


//replace one item in the rule
// szattr_name is the attribute of the item to be replaced. 
//if szattr_name=="*", then consider all items in the rule as candidates to be replaced.
void CTREE_EHTA::AR_ReplaceOneAttr(char* szinput_name, ASSOCRULE *prule, char* szattr_name, PAT_INFO *ppat_info_array, FILE *fpout)
{
	int i, j, k, num_of_sibs, num_of_sibs_covered;
	map<string, int>::iterator map_it;
	char szerrmsg[200];

	if(prule->npat_len==0)
		return;

	for(i=0;i<prule->npat_len;i++)
	{
		mpitem_bitmap[prule->pattern[i]] = 1;
		mpattr_bitmap[gpAttrValues[prule->pattern[i]].nattr_no] = 1;
	}

	if(strcmp(szattr_name, "*")==0)
		mnexplore_attr_no = -1;
	else
	{		
		map_it = gpattr2id_map->find(szattr_name);
		if(map_it==gpattr2id_map->end())
		{
			sprintf(szerrmsg, "Error: cannot find attribute %s", szattr_name);
			LogErrMsg("CTREE_EHTA", "AR_ReplaceOneAttr", szerrmsg);
			mnexplore_attr_no = -2;
		}
		else
		{
			mnexplore_attr_no = map_it->second;
			for(i=0;i<prule->npat_len;i++)
			{
				if(mnexplore_attr_no==gpAttrValues[prule->pattern[i]].nattr_no)
					break;
			}
			if(i==prule->npat_len)
			{
				sprintf(szerrmsg, "Error: attribute %s does not appear in the rule", szattr_name);
				LogErrMsg("CTREE_EHTA", "AR_ReplaceOneAttr", szerrmsg);
				mnexplore_attr_no = -2;
			}
		}
	}

	num_of_sibs = 0;
	if(mnexplore_attr_no==-1)
	{
		for(i=0;i<prule->npat_len;i++)
		{
			k = gpAttrValues[prule->pattern[i]].nattr_no;
			for(j=gpAttributes[k].nstart_item_id;j<gpAttributes[k].nstart_item_id+gpAttributes[k].num_of_values;j++)
			{
				if(mpitem_bitmap[j]==0)
				{
					mpitem_bitmap[j] = 2;
					ppat_info_array[j].npreorder = -1;
					num_of_sibs++;
				}
			}
		}
	}
	else if(mnexplore_attr_no>=0)
	{
		for(j=gpAttributes[mnexplore_attr_no].nstart_item_id;j<gpAttributes[mnexplore_attr_no].nstart_item_id+gpAttributes[mnexplore_attr_no].num_of_values;j++)
		{
			if(mpitem_bitmap[j]==0)
			{
				mpitem_bitmap[j] = 2;
				ppat_info_array[j].npreorder = -1;
				num_of_sibs++;
			}
		}
	}

	if(mnexplore_attr_no>=-1)
	{
		mnpat_len = 0;
		mnum_of_shared_items = 0;

		search_siblings(0, prule->npat_len, ppat_info_array);

		if(fpout!=NULL)
			output_siblings(fpout, prule, mnexplore_attr_no, ppat_info_array);

		//search for infrequent itemsets
		num_of_sibs_covered = 0;
		if(mnexplore_attr_no==-1)
		{
			for(i=0;i<prule->npat_len;i++)
			{
				k = gpAttrValues[prule->pattern[i]].nattr_no;
				for(j=gpAttributes[k].nstart_item_id;j<gpAttributes[k].nstart_item_id+gpAttributes[k].num_of_values;j++)
				{
					if(mpitem_bitmap[j]==2 && ppat_info_array[j].npreorder>=0)
						num_of_sibs_covered++;
				}
			}
		}
		else
		{
			for(j=gpAttributes[mnexplore_attr_no].nstart_item_id;j<gpAttributes[mnexplore_attr_no].nstart_item_id+gpAttributes[mnexplore_attr_no].num_of_values;j++)
			{
				if(mpitem_bitmap[j]==2 && ppat_info_array[j].npreorder>=0)
					num_of_sibs_covered++;
			}
		}

		if(num_of_sibs_covered<num_of_sibs)
			tidlist_siblings(szinput_name, prule, mnexplore_attr_no, ppat_info_array, fpout);
	}

	memset(mpitem_bitmap, 0, gnum_of_items);
	for(i=0;i<prule->npat_len;i++)
		mpattr_bitmap[gpAttrValues[prule->pattern[i]].nattr_no] = 0;
}

void CTREE_EHTA::search_siblings(int ndisk_pos, int num_of_items, PAT_INFO *ppat_info_array)
{
	int nfile_pos, num_of_entries;
	ENTRY *pentries;
	char *ptgt_stat_array;
	int *pitems, num_of_matched, i, j, nlast_index, num_of_contained, nattr_no, nstart_pos, nend_pos;

	nfile_pos = ftell(mfpcfp_file);
	if(ndisk_pos!=nfile_pos)
		fseek(mfpcfp_file, ndisk_pos-nfile_pos, SEEK_CUR);
	fread(&num_of_entries, sizeof(int), 1, mfpcfp_file);

	pentries = &mpentry_buf[mndfs_entry_buf_pos];
	ptgt_stat_array = &mptgt_stat_buf[mndfs_entry_buf_pos*gntgt_stat_size];

	if(num_of_entries==1 || num_of_entries<-1)
	{
		if(num_of_entries<-1)
			num_of_entries = -num_of_entries;

		pitems = &mppattern[mnpat_len];
		fread(pentries, sizeof(ENTRY)-sizeof(int), 1, mfpcfp_file);
		fread(pitems, sizeof(int), num_of_entries, mfpcfp_file);
		fread(ptgt_stat_array, sizeof(char), gntgt_stat_size, mfpcfp_file);

		num_of_matched = 0;
		for(i=0;i<num_of_entries;i++)
		{
			if(mpitem_bitmap[pitems[i]]==1)
			{
				mppattern[mnpat_len++] = pitems[i];
				num_of_matched++;
				nattr_no = gpAttrValues[pitems[i]].nattr_no;
				for(j=gpAttributes[nattr_no].nstart_item_id;j<gpAttributes[nattr_no].nstart_item_id+gpAttributes[nattr_no].num_of_values;j++)
				{
					if(j!=pitems[i])
					{
						ppat_info_array[j].npreorder = pentries->npreorder;
						ppat_info_array[j].nsupport = 0;
					}
				}
			}
		}

		if(num_of_items-num_of_matched>0 && pentries->child!=0)
			search_siblings(pentries->child, num_of_items-num_of_matched, ppat_info_array);

		mnpat_len -= num_of_matched;
	}
	else if(num_of_entries>1)
	{
		mndfs_entry_buf_pos += num_of_entries;
		fread(pentries, sizeof(ENTRY), num_of_entries, mfpcfp_file);
		fread(ptgt_stat_array, sizeof(char), num_of_entries*gntgt_stat_size, mfpcfp_file);

		nlast_index = -1;
		num_of_contained = 0;
		for(i=0;i<num_of_entries;i++)
		{
			if(mpitem_bitmap[pentries[i].item]==1)
			{
				nlast_index = i;
				num_of_contained++;
			}
		}
		if(num_of_contained==num_of_items)
		{
			if(mnexplore_attr_no==-1 || gpAttrValues[pentries[nlast_index].item].nattr_no==mnexplore_attr_no)
			{
				nattr_no = gpAttrValues[pentries[nlast_index].item].nattr_no;
				nstart_pos = nlast_index;
				while(nstart_pos>=0 && gpAttrValues[pentries[nstart_pos].item].nattr_no==nattr_no)
					nstart_pos--;
				nstart_pos++;
				nend_pos = nlast_index+1;
				while(nend_pos<num_of_entries && gpAttrValues[pentries[nend_pos].item].nattr_no==nattr_no)
					nend_pos++;
				if(num_of_items==1)
				{
					for(i=nstart_pos;i<nend_pos;i++)
					{
						if(i!=nlast_index)
							CopyPatInfo(pentries[i].npreorder, pentries[i].support, &ptgt_stat_array[i*gntgt_stat_size], &ppat_info_array[pentries[i].item]);
					}
				}
				else if(num_of_items>1)
				{
					for(i=nstart_pos;i<nend_pos;i++)
					{
						if(pentries[i].child!=0)
						{
							mppattern[mnpat_len++] = pentries[i].item;
							if(i==nlast_index)
							{
								if(mnexplore_attr_no==-1)
									search_siblings(pentries[i].child, num_of_items-1, ppat_info_array);
							}
							else
								search_exact(pentries[i].child, num_of_items-1, &ppat_info_array[pentries[i].item]);
							mnpat_len--;
						}
					}
				}
			}
			else if(num_of_items>1)
			{
				if(pentries[nlast_index].child!=0)
				{
					mppattern[mnpat_len++] = pentries[nlast_index].item;
					search_siblings(pentries[nlast_index].child, num_of_items-1, ppat_info_array);
					mnpat_len--;
				}
			}
		}
		else if(num_of_contained==num_of_items-1)
		{
			if(nlast_index==-1)
				nlast_index = 0;
			nattr_no = gpAttrValues[pentries[nlast_index].item].nattr_no;
			i = nlast_index+1;
			while(i<num_of_entries && gpAttrValues[pentries[i].item].nattr_no==nattr_no)
				i++;
			while(i<num_of_entries && mpattr_bitmap[gpAttrValues[pentries[i].item].nattr_no]==0)
				i++;
			if(i<num_of_entries)
			{
				nattr_no = gpAttrValues[pentries[i].item].nattr_no;
				if(nattr_no==mnexplore_attr_no || mnexplore_attr_no==-1)
				{
					while(i<num_of_entries && gpAttrValues[pentries[i].item].nattr_no==nattr_no)
					{
						if(num_of_items==1)
							CopyPatInfo(pentries[i].npreorder, pentries[i].support, &ptgt_stat_array[i*gntgt_stat_size], &ppat_info_array[pentries[i].item]);
						else if(pentries[i].child!=0)
						{
							mppattern[mnpat_len++] = pentries[i].item;
							search_exact(pentries[i].child, num_of_items-1, &ppat_info_array[pentries[i].item]);
							mnpat_len--;
						}
						i++;
					}
				}
			}

		}
		mndfs_entry_buf_pos -= num_of_entries;
	}
}


void CTREE_EHTA::output_siblings(FILE *fpout, ASSOCRULE *prule, int nexplore_attr_no, PAT_INFO *ppat_info_array)
{
	int i, j, k, nitem, nattr_no;
	DIFF_ITEM *pdiff_item1, *pdiff_item2;
	double dscore;

	morule.npat_len = prule->npat_len;

	pdiff_item1 = &mpdiff_items[0];
	pdiff_item2 = &mpdiff_items[1];

	if(nexplore_attr_no>=0)
	{
		nitem = -1;
		j = 0;
		for(i=0;i<prule->npat_len;i++)
		{
			if(gpAttrValues[prule->pattern[i]].nattr_no!=nexplore_attr_no)
				morule.pattern[j++] = prule->pattern[i];
			else 
				nitem = prule->pattern[i];
		}
		for(i=gpAttributes[nexplore_attr_no].nstart_item_id;i<gpAttributes[nexplore_attr_no].nstart_item_id+gpAttributes[nexplore_attr_no].num_of_values;i++)
		{
			if(i!=nitem && ppat_info_array[i].npreorder>=0 && ppat_info_array[i].nsupport>0)
			{
				morule.pattern[prule->npat_len-1] = i;
				CopyDiffItem(prule->ppat_info, pdiff_item1);
				CopyDiffItem(&ppat_info_array[i], pdiff_item2);
				morule.dcond_pvalue = CalcPvalue(pdiff_item1, pdiff_item2, dscore, 0);
				output_one_freq_rule(fpout, prule, &ppat_info_array[i]);
			}
		}
	}
	else
	{

		memcpy(morule.pattern, prule->pattern, morule.npat_len*sizeof(int));

		for(i=prule->npat_len-1;i>=0;i--)
		{
			for(j=i+1;j<prule->npat_len;j++)
				morule.pattern[j-1] = prule->pattern[j];

			nitem = prule->pattern[i];
			nattr_no = gpAttrValues[nitem].nattr_no;
			for(k=gpAttributes[nattr_no].nstart_item_id;k<gpAttributes[nattr_no].nstart_item_id+gpAttributes[nattr_no].num_of_values;k++)
			{
				if(k!=nitem && ppat_info_array[k].npreorder>=0 && ppat_info_array[k].nsupport>0)
				{
					morule.pattern[prule->npat_len-1] = k;
					CopyDiffItem(prule->ppat_info, pdiff_item1);
					CopyDiffItem(&ppat_info_array[k], pdiff_item2);
					morule.dcond_pvalue = CalcPvalue(pdiff_item1, pdiff_item2, dscore, 0);
					output_one_freq_rule(fpout, prule, &ppat_info_array[k]);
				}
			}
		}
	}
}

void CTREE_EHTA::tidlist_siblings(char* szinput_name, ASSOCRULE *prule, int nexplore_attr_no, PAT_INFO *ppat_info_array, FILE *fpout)
{
	FILE *fp, *fpdir;
	char sztidlist_filename[200], szerrmsg[200];	
	TIDLIST_NODE *pnodes;
	int i, j, k;
	int ntidlist_len1, nmax_tidlist_len, *ptemp_tidlist1, nresult_len, *presult_tidlist, nitem, nattr_no, nstart_pos;
	int ntidlist_disk_pos, nitem_tidlist_len, *pitem_tidlist, nitem_tidlist_capacity, *ptemp_tidlist2, ntidlist_len2;
	int nrule_tidlist_len, *prule_tidlist;
	bool btgt_value_loaded;
	double dscore;

	sprintf(sztidlist_filename, "%s.item.tidlist", szinput_name);
	fp = fopen(sztidlist_filename, "rb");
	if(fp==NULL)
	{
		sprintf(szerrmsg, "Error: cannot open file %s for read.", sztidlist_filename);
		LogErrMsg("", "LoadOneItemTidlist", szerrmsg);
		return;
	}
	sprintf(sztidlist_filename, "%s.item.tidlist.dir", szinput_name);
	fpdir = fopen(sztidlist_filename, "rb");
	if(fpdir==NULL)
	{
		sprintf(szerrmsg, "Error: cannot open file %s for read.", sztidlist_filename);
		LogErrMsg("", "LoadOneItemTidlist", szerrmsg);
		return;
	}

	pnodes = new TIDLIST_NODE[prule->npat_len];
	IncMemSize(sizeof(TIDLIST_NODE)*prule->npat_len);
	memset(pnodes, 0, sizeof(TIDLIST_NODE)*prule->npat_len);

	for(i=0;i<prule->npat_len;i++)
		pnodes[i].nitem = prule->pattern[i];

	qsort(pnodes, prule->npat_len, sizeof(TIDLIST_NODE), comp_tidlist_nodes_item);
	for(i=0;i<prule->npat_len;i++)
		pnodes[i].ndisk_pos = LoadOneItemListPos(fpdir, pnodes[i].nitem, pnodes[i].nitem_tidlist_len);

	nstart_pos = 0;
	for(i=0;i<prule->npat_len;i++)
	{
		if(pnodes[i].nitem_tidlist_len==gndb_size)
		{
			pnodes[i].nstatus = 2;
			nstart_pos++;
		}
		else if(nexplore_attr_no>=0 && gpAttrValues[pnodes[i].nitem].nattr_no!=nexplore_attr_no)
			pnodes[i].nstatus = 1;
	}

	if(nstart_pos<prule->npat_len)
	{
		if(gptgt_values==NULL)
		{
			btgt_value_loaded = true;
			LoadTransTgtValues(szinput_name);
		}
		else
			btgt_value_loaded = false;

		qsort(pnodes, prule->npat_len, sizeof(TIDLIST_NODE), comp_tidlist_nodes_status);
		nmax_tidlist_len = 0;
		for(i=0;i<prule->npat_len;i++)
		{
			morule.pattern[i] = pnodes[i].nitem;
			if(nmax_tidlist_len<pnodes[i].nitem_tidlist_len && pnodes[i].nitem_tidlist_len<gndb_size)
				nmax_tidlist_len = pnodes[i].nitem_tidlist_len;
		}

		nitem_tidlist_capacity = nmax_tidlist_len;
		pitem_tidlist = NewIntArray(nitem_tidlist_capacity);
		ptemp_tidlist1 = NewIntArray(nmax_tidlist_len);
		ptemp_tidlist2 = NewIntArray(nmax_tidlist_len);
		presult_tidlist = NewIntArray(nmax_tidlist_len);
		prule_tidlist = NewIntArray(nmax_tidlist_len);

		ntidlist_len1 = -1;
		for(i=nstart_pos;i<prule->npat_len-1;i++)
		{
			pnodes[i].pitem_tid_list = NewIntArray(pnodes[i].nitem_tidlist_len);
			LoadOneItemTidlist(fp, pnodes[i].ndisk_pos, pnodes[i].pitem_tid_list, pnodes[i].nitem_tidlist_len);
			if(i>nstart_pos)
			{
				if(ntidlist_len1==prule->ppat_info->nsupport)
					pnodes[i].ntidlist_len = ntidlist_len1; 
				else
				{
					ntidlist_len1 = get_intersection(ntidlist_len1, ptemp_tidlist1, pnodes[i].nitem_tidlist_len, pnodes[i].pitem_tid_list, ptemp_tidlist1);
					pnodes[i].ptid_list = NewIntArray(ntidlist_len1);
					memcpy(pnodes[i].ptid_list, ptemp_tidlist1, sizeof(int)*ntidlist_len1);
					pnodes[i].ntidlist_len = ntidlist_len1;
				}
			}
			else 
			{
				ntidlist_len1 = pnodes[i].nitem_tidlist_len;
				memcpy(ptemp_tidlist1, pnodes[i].pitem_tid_list, sizeof(int)*pnodes[i].nitem_tidlist_len);
				pnodes[i].ntidlist_len = ntidlist_len1;
				pnodes[i].ptid_list = NewIntArray(ntidlist_len1);
				memcpy(pnodes[i].ptid_list, ptemp_tidlist1, sizeof(int)*ntidlist_len1);
			}	
		}

		morule.npat_len = prule->npat_len;
		nitem = pnodes[prule->npat_len-1].nitem;
		nattr_no = gpAttrValues[nitem].nattr_no;
		if(nexplore_attr_no>=0 && nattr_no!=nexplore_attr_no)
			LogErrMsg("CTREE_EHTA", "tidlist_siblings", "Error: inconsistent attribute no.");

		CopyDiffItem(prule->ppat_info, &mpdiff_items[0]);
		mpdiff_items[0].ptid_list = prule_tidlist;
		if(gntgt_attr_type==CONTINUOUS)
		{
			ntidlist_disk_pos = LoadOneItemListPos(fpdir, nitem, nitem_tidlist_len);
			LoadOneItemTidlist(fp, ntidlist_disk_pos, pitem_tidlist, nitem_tidlist_len);
			if(ntidlist_len1>0)
				nrule_tidlist_len = get_intersection(ntidlist_len1, ptemp_tidlist1, nitem_tidlist_len, pitem_tidlist, prule_tidlist);
			else
			{
				memcpy(prule_tidlist, pitem_tidlist, sizeof(int)*nitem_tidlist_len);
				nrule_tidlist_len = nitem_tidlist_len;
			}
			if(nrule_tidlist_len!=prule->ppat_info->nsupport)
				LogErrMsg("CTREE_EHTA", "tidlist_siblings", "Error: inconsistent rule support.");
		}
		
		if(ntidlist_len1>0 && ntidlist_len1==prule->ppat_info->nsupport)
		{
			for(i=gpAttributes[nattr_no].nstart_item_id;i<gpAttributes[nattr_no].nstart_item_id+gpAttributes[nattr_no].num_of_values;i++)
			{
				if(i!=nitem && ppat_info_array[i].npreorder==-1)
					ppat_info_array[i].nsupport = 0;
			}
		}
		else
		{
			for(i=gpAttributes[nattr_no].nstart_item_id;i<gpAttributes[nattr_no].nstart_item_id+gpAttributes[nattr_no].num_of_values;i++)
			{
				if(i!=nitem && ppat_info_array[i].npreorder==-1)
				{
					ntidlist_disk_pos = LoadOneItemListPos(fpdir, i, nitem_tidlist_len);

					if(nitem_tidlist_len>0)
					{
						if(nitem_tidlist_len>nitem_tidlist_capacity)
							nitem_tidlist_capacity = ResizeArray(pitem_tidlist, nitem_tidlist_capacity, nitem_tidlist_len);
						LoadOneItemTidlist(fp, ntidlist_disk_pos, pitem_tidlist, nitem_tidlist_len);
						if(ntidlist_len1>0)
						{
							nresult_len = get_intersection(ntidlist_len1, ptemp_tidlist1, nitem_tidlist_len, pitem_tidlist, presult_tidlist);
							if(nresult_len>0)
							{
								get_pat_info(presult_tidlist, nresult_len, &ppat_info_array[i]);
								if(fpout!=NULL)
								{
									morule.pattern[prule->npat_len-1] = i;
									morule.ppat_info = &ppat_info_array[i];
									morule.dpvalue = CalcRulePvalue(gndb_size, gptgt_stat_array, ppat_info_array[i].nsupport, ppat_info_array[i].ptgt_stat, morule.dscore, presult_tidlist, gptgt_values);
									
									CopyDiffItem(&ppat_info_array[i], &mpdiff_items[1]);
									mpdiff_items[1].ptid_list = presult_tidlist;
									morule.dcond_pvalue = CalcPvalue(&mpdiff_items[0], &mpdiff_items[1], dscore, 0);
									
									OutputOneRule(fpout, &morule);
								}
							}
						}
						else if(nitem_tidlist_len>0)
						{
							get_pat_info(pitem_tidlist, nitem_tidlist_len, &ppat_info_array[i]);
							if(fpout!=NULL)
							{
								morule.pattern[prule->npat_len-1] = i;
								morule.ppat_info = &ppat_info_array[i];
								morule.dpvalue = CalcRulePvalue(gndb_size, gptgt_stat_array, ppat_info_array[i].nsupport, ppat_info_array[i].ptgt_stat, morule.dscore, pitem_tidlist, gptgt_values);

								CopyDiffItem(&ppat_info_array[i], &mpdiff_items[1]);
								mpdiff_items[1].ptid_list = pitem_tidlist;
								morule.dcond_pvalue = CalcPvalue(&mpdiff_items[0], &mpdiff_items[1], dscore, 0);
								
								OutputOneRule(fpout, &morule);
							}
						}
					}
				}
			}
		}

		if(nexplore_attr_no==-1)
		{
			ntidlist_len1 = pnodes[prule->npat_len-1].nitem_tidlist_len;
			LoadOneItemTidlist(fp, pnodes[prule->npat_len-1].ndisk_pos, ptemp_tidlist1, ntidlist_len1);
			for(i=prule->npat_len-2;i>=nstart_pos;i--)
			{
				nitem = pnodes[i].nitem;
				nattr_no = gpAttrValues[nitem].nattr_no;
				if(ntidlist_len1==prule->ppat_info->nsupport || i>nstart_pos && pnodes[i-1].ntidlist_len==prule->ppat_info->nsupport)
				{
					for(k=gpAttributes[nattr_no].nstart_item_id;k<gpAttributes[nattr_no].nstart_item_id+gpAttributes[nattr_no].num_of_values;k++)
					{
						if(k!=nitem && ppat_info_array[k].npreorder==-1)
							ppat_info_array[k].nsupport = 0;
					}
				}
				else
				{
					if(i>nstart_pos)
						ntidlist_len2 = get_intersection(ntidlist_len1, ptemp_tidlist1, pnodes[i-1].ntidlist_len, pnodes[i-1].ptid_list, ptemp_tidlist2);
					else
					{
						ntidlist_len2 = ntidlist_len1;
						memcpy(ptemp_tidlist2, ptemp_tidlist1, sizeof(int)*ntidlist_len1);
					}

					for(k=gpAttributes[nattr_no].nstart_item_id;k<gpAttributes[nattr_no].nstart_item_id+gpAttributes[nattr_no].num_of_values;k++)
					{
						if(k!=nitem && ppat_info_array[k].npreorder==-1)
						{
							ntidlist_disk_pos = LoadOneItemListPos(fpdir, k, nitem_tidlist_len);

							if(nitem_tidlist_len>0)
							{
								if(nitem_tidlist_len>nitem_tidlist_capacity)
									nitem_tidlist_capacity = ResizeArray(pitem_tidlist, nitem_tidlist_capacity, nitem_tidlist_len);
								LoadOneItemTidlist(fp, ntidlist_disk_pos, pitem_tidlist, nitem_tidlist_len);

								nresult_len = get_intersection(ntidlist_len2, ptemp_tidlist2, nitem_tidlist_len, pitem_tidlist, presult_tidlist);

								if(nresult_len>0)
								{
									get_pat_info(presult_tidlist, nresult_len, &ppat_info_array[k]);
									if(fpout!=NULL)
									{
										for(j=i+1;j<prule->npat_len;j++)
											morule.pattern[j-1] = pnodes[j].nitem;
										morule.pattern[prule->npat_len-1] = k;
										morule.ppat_info = &ppat_info_array[k];
										morule.dpvalue = CalcRulePvalue(gndb_size, gptgt_stat_array, ppat_info_array[k].nsupport, ppat_info_array[k].ptgt_stat, morule.dscore, presult_tidlist, gptgt_values);

										CopyDiffItem(&ppat_info_array[k], &mpdiff_items[1]);
										mpdiff_items[1].ptid_list = presult_tidlist;
										morule.dcond_pvalue = CalcPvalue(&mpdiff_items[0], &mpdiff_items[1], dscore, 0);

										OutputOneRule(fpout, &morule);
									}
								}
							}
						}
					}
				}
				if(i>nstart_pos && ntidlist_len1>prule->ppat_info->nsupport)
					ntidlist_len1 = get_intersection(ntidlist_len1, ptemp_tidlist1, pnodes[i].nitem_tidlist_len, pnodes[i].pitem_tid_list, ptemp_tidlist1);
			}
		}

		DelIntArray(pitem_tidlist, nitem_tidlist_capacity);
		DelIntArray(ptemp_tidlist1, nmax_tidlist_len);
		DelIntArray(ptemp_tidlist2, nmax_tidlist_len);
		DelIntArray(presult_tidlist, nmax_tidlist_len);
		DelIntArray(prule_tidlist, nmax_tidlist_len);

		if(btgt_value_loaded)
			DelTransTgtValues();

		for(i=nstart_pos;i<prule->npat_len;i++)
		{
			if(pnodes[i].pitem_tid_list!=NULL)
				DelIntArray(pnodes[i].pitem_tid_list, pnodes[i].nitem_tidlist_len);
			if(pnodes[i].ptid_list!=NULL)
				DelIntArray(pnodes[i].ptid_list, pnodes[i].ntidlist_len);
		}
	}
	
	delete []pnodes;
	DecMemSize(sizeof(TIDLIST_NODE)*prule->npat_len);

	fclose(fp);
	fclose(fpdir);
}



void CTREE_EHTA::AR_FilterByAttrsNItems(char* szattr_file, char* szitem_file)
{
	int i, num_of_includes;

	memset(mpattr_bitmap, 0, gnum_of_attrs);
	LoadFilterConds(szattr_file, gpattr2id_map, mpattr_bitmap, gnum_of_attrs);
	
	memset(mpitem_bitmap, 0, gnum_of_items);
	LoadFilterConds(szitem_file, gpattrvalue2item_map, mpitem_bitmap, gnum_of_items);

	CheckAttrNValueStatus(mpattr_bitmap, mpitem_bitmap);

	num_of_includes = 0;
	for(i=0;i<gnum_of_attrs;i++)
	{
		if(mpattr_bitmap[i]==INCLUDE)
			num_of_includes++;
	}

	mnpat_len = 0;
	mnum_of_shared_items = 0;

	search_ar(0, num_of_includes);

	memset(mpattr_bitmap, 0, gnum_of_attrs);
	memset(mpitem_bitmap, 0, gnum_of_items);
}


void CTREE_EHTA::search_ar(int ndisk_pos, int num_of_items)
{
	int nfile_pos, num_of_entries;
	ENTRY *pentries;
	char *ptgt_stat_array;
	int *pitems, num_of_matched, norig_shared_item_num, nlast_index, i, num_of_contained, nattr_no;

	nfile_pos = ftell(mfpcfp_file);
	if(ndisk_pos!=nfile_pos)
		fseek(mfpcfp_file, ndisk_pos-nfile_pos, SEEK_CUR);
	fread(&num_of_entries, sizeof(int), 1, mfpcfp_file);

	pentries = &mpentry_buf[mndfs_entry_buf_pos];
	ptgt_stat_array = &mptgt_stat_buf[mndfs_entry_buf_pos*gntgt_stat_size];

	if(num_of_entries==1 || num_of_entries<-1)
	{
		if(num_of_entries<-1)
			num_of_entries = -num_of_entries;

		pitems = &mppattern[mnpat_len];
		fread(pentries, sizeof(ENTRY)-sizeof(int), 1, mfpcfp_file);
		fread(pitems, sizeof(int), num_of_entries, mfpcfp_file);
		fread(ptgt_stat_array, sizeof(char), gntgt_stat_size, mfpcfp_file);
		mndfs_entry_buf_pos++;

		norig_shared_item_num = 0;
		num_of_matched = 0;
		for(i=0;i<num_of_entries;i++)
		{
			if(mpitem_bitmap[pitems[i]]!=EXCLUDE)
			{
				if(mpitem_bitmap[pitems[i]]==INCLUDE || mpattr_bitmap[gpAttrValues[pitems[i]].nattr_no]==INCLUDE)
				{
					mppattern[mnpat_len++] = pitems[i];
					num_of_matched++;
				}
				else //if(mpitem_bitmap[pitems[i]]==OPTIONAL)
					mpshared_items[mnum_of_shared_items++] = pitems[i];
			}
		}

		if(num_of_matched==num_of_items)
			output_rules(mfptext, mnpat_len, mnum_of_shared_items, pentries, ptgt_stat_array);
		if(pentries->child!=0)
			search_ar(pentries->child, num_of_items-num_of_matched);

		mnpat_len -= num_of_matched;
		mnum_of_shared_items = norig_shared_item_num;
		mndfs_entry_buf_pos--;
	}
	else if(num_of_entries>1)
	{
		mndfs_entry_buf_pos += num_of_entries;
		fread(pentries, sizeof(ENTRY), num_of_entries, mfpcfp_file);
		fread(ptgt_stat_array, sizeof(char), num_of_entries*gntgt_stat_size, mfpcfp_file);

		nlast_index = -1;
		num_of_contained = 0;
		if(num_of_items>0)
		{
			i = 0;
			while(i<num_of_entries)
			{
				nattr_no = gpAttrValues[pentries[i].item].nattr_no;
				if(mpattr_bitmap[nattr_no]==INCLUDE)
				{
					nlast_index = i;
					num_of_contained++;
				}
				i++;
				while(i<num_of_entries && gpAttrValues[pentries[i].item].nattr_no==nattr_no)
					i++;
			}
			if(num_of_contained==num_of_items)
			{
				for(i=nlast_index;i<num_of_entries;i++)
				{
					if(mpitem_bitmap[pentries[i].item]!=EXCLUDE)
					{
						mppattern[mnpat_len++] = pentries[i].item;
						if(mpitem_bitmap[pentries[i].item]==INCLUDE || mpattr_bitmap[gpAttrValues[pentries[i].item].nattr_no]==INCLUDE)
						{
							if(num_of_items==1)
								output_rules(mfptext, mnpat_len, mnum_of_shared_items, &pentries[i], &ptgt_stat_array[i*gntgt_stat_size]);
							if(pentries[i].child!=0)
								search_ar(pentries[i].child, num_of_items-1);
						}
						else
						{
							if(pentries[i].child!=0)
								search_ar(pentries[i].child, num_of_items);
						}
						mnpat_len--;
					}
				}
			}
		}
		else 
		{
			for(i=0;i<num_of_entries;i++)
			{
				if(mpitem_bitmap[pentries[i].item]==OPTIONAL)
				{
					mppattern[mnpat_len++] = pentries[i].item;
					output_rules(mfptext, mnpat_len, mnum_of_shared_items, &pentries[i], &ptgt_stat_array[i*gntgt_stat_size]);
					if(pentries[i].child!=0)
						search_ar(pentries[i].child, 0);
					mnpat_len--;
				}
			}
		}
		mndfs_entry_buf_pos -= num_of_entries;
	}
}

void CTREE_EHTA::output_rules(FILE *fp, int npat_len, int num_of_shared_items, ENTRY *pentry, char* ptgt_stat)
{
	int i;

	CopyPatInfo(pentry->npreorder, pentry->support, ptgt_stat, &mopat_info);

	if(gntgt_attr_type==CONTINUOUS && gnmin_sup<CLT_THRESHOLD)
		LoadOneTidList(mfp_tidlist_dir, mfp_tidlist, pentry->npreorder, mptid_list, pentry->support);

	morule.dpvalue = CalcRulePvalue(gndb_size, gptgt_stat_array, pentry->support, ptgt_stat, morule.dscore, mptid_list, gptgt_values);

	morule.npat_len = npat_len;
	for(i=0;i<num_of_shared_items;i++)
		morule.pattern[morule.npat_len++] = mpshared_items[i];

	OutputOneRule(fp, &morule);
}

/*
void CTREE_EHTA::FilterRules(char* szcfptree_name, char* szoutput_filename)
{
	char szattrvalue_filename[200];
	struct timeb start, end;
	double drun_time;


	ftime(&start);

	gdused_mem_size = 0;
	gdmax_used_mem_size = 0;

	mfptext = fopen(szoutput_filename, "wt");
	if(mfptext==NULL)
	{
		printf("Error: cannot open file %s for write\n", szoutput_filename);
		return;
	}

	QueryInitialization(szcfptree_name);

	//GenFilterConds("attr.filter.txt", 2, "item.filter.txt");

	AR_FilterByAttrsNItems("attr.filter.txt", "item.filter.txt");

	fclose(mfptext);

	sprintf(szattrvalue_filename, "%s.attrvalue2item.txt", szcfptree_name);
	ConvertRules(szoutput_filename, szattrvalue_filename, "rule.raw.txt");

	QueryTermination();

	ftime(&end);
	drun_time = end.time-start.time+(double)(end.millitm-start.millitm)/1000;

	printf("Time for filtering: %.3f\n", drun_time);
}
*/

void get_pat_info(int *ptidlist, int ntidlist_len, PAT_INFO *ppat_info)
{
	int i;

	ppat_info->nsupport = ntidlist_len;

	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
	{
		TGT_SUM *ptgt_sum;

		ptgt_sum = (TGT_SUM*)ppat_info->ptgt_stat;
		ptgt_sum->dsum = 0;
		ptgt_sum->dsquare_sum = 0;

		for(i=0;i<ntidlist_len;i++)
		{
			ptgt_sum->dsum += gptgt_values[ptidlist[i]];
			ptgt_sum->dsquare_sum += gptgt_values[ptidlist[i]]*gptgt_values[ptidlist[i]];
		}
		ppat_info->dmean = ptgt_sum->dsum/ntidlist_len;
		if(ppat_info->nsupport>1)
			ppat_info->dstd_dev = sqrt((ptgt_sum->dsquare_sum-ptgt_sum->dsum*ppat_info->dmean)/(ppat_info->nsupport-1));
		else
			ppat_info->dstd_dev = 0;
	}
	else if(gsztarget_value[0]!=0)
	{
		int nsup;

		nsup = 0;
		for(i=0;i<ntidlist_len;i++)
		{
			if(gpint_tgt_values[ptidlist[i]]==1)
				nsup++;
		}
		memcpy(ppat_info->ptgt_stat, &nsup, sizeof(int));
		ppat_info->dmean = (double)nsup/ntidlist_len;
	}
	else
	{
		int *psup_array;

		psup_array = (int*)ppat_info->ptgt_stat;
		memset(psup_array, 0, sizeof(int)*gnum_of_tgt_values);
		for(i=0;i<ntidlist_len;i++)
			psup_array[gpint_tgt_values[ptidlist[i]]]++;
	}
}



void CTREE_EHTA::SearchParents(int nrule_no, ASSOCRULE *prule)
{
	int i;

	for(i=0;i<prule->npat_len;i++)
	{
		mpitem_bitmap[prule->pattern[i]] |= EXPLORE_IN_PAT;
		mpcontext_rs_items[prule->pattern[i]].npreorder = -1;
	}

	mnpat_len = 0;
	mnum_of_shared_items = 0;

	search_immd_subsets(0, prule->npat_len, -1, mpcontext_rs_items);

	for(i=0;i<prule->npat_len;i++)
		mpitem_bitmap[prule->pattern[i]] = 0;
}



void CTREE_EHTA::SearchChildren(int nrule_no, ASSOCRULE *prule)
{
	int i;

	for(i=0;i<prule->npat_len;i++)
		mpitem_bitmap[prule->pattern[i]] = 1;

	for(i=0;i<gnum_of_items;i++)
	{
		if(mpitem_bitmap[i]==0)
			mpitem_bitmap[i] = 2;
	}

	mnpat_len = 0;
	mnum_of_shared_items = 0;

	search_immd_supersets(0, prule->npat_len, -1, mpcontext_rs_items);

	memset(mpitem_bitmap, 0, gnum_of_items);

}


void CTREE_EHTA::SearchSiblings(int nrule_no, ASSOCRULE *prule)
{
	int i;

	if(prule->npat_len==0)
		return;

	for(i=0;i<prule->npat_len;i++)
		mpitem_bitmap[prule->pattern[i]] = 1;

	for(i=0;i<gnum_of_items;i++)
	{
		if(mpitem_bitmap[i]==0)
			mpitem_bitmap[i] = 2;
	}

	mnpat_len = 0;
	mnum_of_shared_items = 0;

	search_siblings(0, prule->npat_len, mpcontext_rs_items);

	memset(mpitem_bitmap, 0, gnum_of_items);
	//for(i=0;i<prule->npat_len;i++)
	//	mpitem_bitmap[prule->pattern[i]] = 0;
}


// load CFP-tree pages into memory. The start position of the pages loaded cannot exceed nstart_pos_boundary.
int CTREE_EHTA::LoadPages(int nstart_pos_boundary, int ndisk_pos)
{
	int nfile_pos, ncur_page_no, num_of_entries;
	ENTRY *pentries;
	char* ptgt_stat_array;

	nstart_pos_boundary = nstart_pos_boundary / gntree_page_size*gntree_page_size;

	while (mninbuf_end_pos <= ndisk_pos && mninbuf_end_pos - nstart_pos_boundary<mnbuf_size)
	{
		while (mnmin_inmem_level >= 0 && mninbuf_end_pos - mninbuf_start_pos == mnbuf_size && mninbuf_start_pos<nstart_pos_boundary)
		{
			if (mninbuf_start_pos + gntree_page_size <= mpactive_stack[mnmin_inmem_level].ndisk_pos)
				mninbuf_start_pos += gntree_page_size;
			else
			{
				num_of_entries = mpactive_stack[mnmin_inmem_level].num_of_entries;
				if (num_of_entries >= 1)
				{
					pentries = NewEntryArray(num_of_entries);
					memcpy(pentries, mpactive_stack[mnmin_inmem_level].pentries, sizeof(ENTRY)*num_of_entries);
					mpactive_stack[mnmin_inmem_level].pentries = pentries;
					ptgt_stat_array = NewTgtStatArray(num_of_entries*gntgt_stat_size);
					memcpy(ptgt_stat_array, mpactive_stack[mnmin_inmem_level].ptgt_stat_array, gntgt_stat_size*num_of_entries);
					mpactive_stack[mnmin_inmem_level].ptgt_stat_array = ptgt_stat_array;
				}
				else
				{
					num_of_entries = -num_of_entries;
					pentries = NewEntryArray(1);
					memcpy(pentries, mpactive_stack[mnmin_inmem_level].pentries, sizeof(ENTRY));
					mpactive_stack[mnmin_inmem_level].pentries = pentries;
					//pitems = NewCFPItemset(num_of_entries);
					//memcpy(pitems, pentries->pitems, sizeof(int)*num_of_entries);
					//pentries->pitems = pitems;
					ptgt_stat_array = NewTgtStatArray(gntgt_stat_size);
					memcpy(ptgt_stat_array, mpactive_stack[mnmin_inmem_level].ptgt_stat_array, gntgt_stat_size);
					mpactive_stack[mnmin_inmem_level].ptgt_stat_array = ptgt_stat_array;
				}
				mpactive_stack[mnmin_inmem_level].in_mem = false;
				mnmin_inmem_level++;
				if (mnmin_inmem_level >= mnactive_top)
					mnmin_inmem_level = -1;
			}
		}
		if (mnmin_inmem_level == -1)
		{
			if (nstart_pos_boundary%gntree_page_size != 0)
				printf("Error: the position should be the at the start of a page\n");
			mninbuf_start_pos = nstart_pos_boundary;
		}
		nfile_pos = ftell(mfpcfp_file);
		if (nfile_pos != mninbuf_end_pos)
			fseek(mfpcfp_file, mninbuf_end_pos - nfile_pos, SEEK_CUR);
		ncur_page_no = (mninbuf_end_pos / gntree_page_size) % mnbuf_num_of_pages;
		fread(mptree_buffer[ncur_page_no], sizeof(char), gntree_page_size, mfpcfp_file);
		mninbuf_end_pos += gntree_page_size;
	}
	if (mninbuf_start_pos>nstart_pos_boundary)
		printf("Error: the start position should not exceed the boundary\n");

	if (mninbuf_end_pos <= ndisk_pos)
		return -1;
	else
		return 0;
}


//------ routines for output rules ------
void OutputOneRule(FILE *fp, ASSOCRULE *prule)
{
	int i;

	fprintf(fp, "%d ", prule->ppat_info->npreorder);

	fprintf(fp, "%d ", prule->npat_len);
	for(i=0;i<prule->npat_len;i++)
		fprintf(fp, "%d ", prule->pattern[i]);

	fprintf(fp, "%d ", prule->ppat_info->nsupport);
	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
		fprintf(fp, "%f %f ", prule->ppat_info->dmean, prule->ppat_info->dstd_dev);
	else if(gsztarget_value[0]!=0)
		fprintf(fp, "%d ", ((int*)prule->ppat_info->ptgt_stat)[0]);
	else if(gbrule_multiclass_pairwise)
	{
		fprintf(fp, "%d ", prule->ntgt_class);
		fprintf(fp, "%d ", ((int*)prule->ppat_info->ptgt_stat)[prule->ntgt_class]);
	}
	else
	{
		for(i=0;i<gnum_of_tgt_values;i++)
			fprintf(fp, "%d ", ((int*)prule->ppat_info->ptgt_stat)[i]);
	}

	fprintf(fp, "%.3f %.4E ", prule->dscore, prule->dpvalue);
	fprintf(fp, "%.4E %.3E", prule->dadjusted_pvalue, prule->dcond_pvalue);

	//fprintf(fp, "\t");

	//fprintf(fp, "%d %d  ", prule->num_of_sign_subsets, prule->num_of_sign_subsets_explained);
	//fprintf(fp, "%d %d  ", prule->num_of_sign_supersets, prule->num_of_sign_supersets_explained);

	//fprintf(fp, "\t");

	//fprintf(fp, "%d %.3E ", prule->nmax_subset_item, prule->dmax_subset_locpvalue);
	//fprintf(fp, "%d %.3E %.3E ", prule->nminsubset_item, prule->dmin_subset_pvalue, prule->dminsubset_locpvalue);

	//fprintf(fp, "%d %.3E ", prule->nmax_superset_item, prule->dmax_superset_locpvalue);
	//fprintf(fp, "%d %.3E %.3E ", prule->nminsuperset_item, prule->dmin_superset_pvalue, prule->dminsuperset_locpvalue);

	fprintf(fp, "\n");
}

void OutputOnePvalue(FILE *fp, int npreorder, int nsup, int ntgt_sup, double dpvalue)
{
	fprintf(fp, "%d %d %d %.3E %.3E\n", npreorder, nsup, ntgt_sup, (double)ntgt_sup/nsup, dpvalue);
}

void OutputRuleSummary(char* szoutput_name)
{
	FILE *fp;
	char szsummary_filename[200];

	sprintf(szsummary_filename, "%s.rule.summary.txt", szoutput_name);
	fp = fopen(szsummary_filename, "wt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for write\n", szsummary_filename);
		return;
	}

	fprintf(fp, "%f\n", gdmax_pvalue);
	fprintf(fp, "%d\n", gnum_of_tests);
	fprintf(fp, "%.3E\n", gdpermFWER_pvalue_thres);
	fprintf(fp, "%d\n", gnum_of_output_rules);
	fprintf(fp, "%d\n", gntotal_context_len);
	fprintf(fp, "%d\n", gntgt_stat_size);
	fprintf(fp, "%d\n", gnum_of_tgt_values);
	if(gbrule_multiclass_pairwise)
		fprintf(fp, "1\n");
	else
		fprintf(fp, "0\n");

	fclose(fp);
}
//------


//====== routines for load rules ======
void LoadOneRule(FILE *fp, ASSOCRULE *prule)
{
	int i;

	fscanf(fp, "%d ", &prule->ppat_info->npreorder);
	if(feof(fp))
		prule->ppat_info->npreorder = -10;
	else
	{
		fscanf(fp, "%d", &prule->npat_len);
		for(i=0;i<prule->npat_len;i++)
			fscanf(fp, "%d", &prule->pattern[i]);
		fscanf(fp, "%d", &prule->ppat_info->nsupport);
		if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
		{
			fscanf(fp, "%lf %lf", &prule->ppat_info->dmean, &prule->ppat_info->dstd_dev);
			((TGT_SUM*)prule->ppat_info->ptgt_stat)->dsum = prule->ppat_info->dmean*prule->ppat_info->nsupport;
			((TGT_SUM*)prule->ppat_info->ptgt_stat)->dsquare_sum = prule->ppat_info->dstd_dev*prule->ppat_info->dstd_dev*(prule->ppat_info->nsupport-1)+prule->ppat_info->nsupport*prule->ppat_info->dmean*prule->ppat_info->dmean;
		}
		else if(gsztarget_value[0]!=0)
		{
			fscanf(fp, "%d", (int*)prule->ppat_info->ptgt_stat);
			prule->ppat_info->dmean = (double)((int*)prule->ppat_info->ptgt_stat)[0]/prule->ppat_info->nsupport;
		}
		else if(gbrule_multiclass_pairwise)
		{
			fscanf(fp, "%d", &prule->ntgt_class);
			fscanf(fp, "%d", &(((int*)prule->ppat_info->ptgt_stat)[prule->ntgt_class]));
		}
		else 
		{
			for(i=0;i<gnum_of_tgt_values;i++)
				fscanf(fp, "%d", &(((int*)prule->ppat_info->ptgt_stat)[i]));
		}

		fscanf(fp, "%lf %lf", &prule->dscore, &prule->dpvalue);
		fscanf(fp, "%lf %lf", &prule->dadjusted_pvalue, &prule->dcond_pvalue);

		//fscanf(fp, "%d %d", &prule->num_of_sign_subsets, &prule->num_of_sign_subsets_explained);
		//fscanf(fp, "%d %d", &prule->num_of_sign_supersets, &prule->num_of_sign_supersets_explained);

		//fscanf(fp, "%d %lf ", &prule->nmax_superset_item, &prule->dmax_superset_locpvalue);
		//fscanf(fp, "%d %lf %lf ", &prule->nminsuperset_item, &prule->dmin_superset_pvalue, &prule->dminsuperset_locpvalue);

		//fscanf(fp, "%d %lf ", &prule->nmax_subset_item, &prule->dmax_subset_locpvalue);
		//fscanf(fp, "%d %lf %lf ", &prule->nminsubset_item, &prule->dmin_subset_pvalue, &prule->dminsubset_locpvalue);
	}
}

void LoadRuleSummary(char* szoutput_name)
{
	FILE *fp;
	char szsummary_filename[200];
	int nflag;

	sprintf(szsummary_filename, "%s.rule.summary.txt", szoutput_name);
	fp = fopen(szsummary_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szsummary_filename);
		return;
	}

	fscanf(fp, "%lf", &gdmax_pvalue);
	fscanf(fp, "%d", &gnum_of_tests);
	fscanf(fp, "%lf", &gdpermFWER_pvalue_thres);
	fscanf(fp, "%d", &gnum_of_output_rules);
	fscanf(fp, "%d", &gntotal_context_len);
	fscanf(fp, "%d", &gntgt_stat_size);
	fscanf(fp, "%d", &gnum_of_tgt_values);
	fscanf(fp, "%d", &nflag);
	if(nflag)
		gbrule_multiclass_pairwise = true;
	else
		gbrule_multiclass_pairwise = false;	

	gdBC_pvalue_thres = gdmax_pvalue/gnum_of_tests;

	fclose(fp);
}
//======


//------
void OutputOneRuleRaw(FILE *fp, ASSOCRULE *prule)
{
	int i;

	fprintf(fp, "%d ", prule->ppat_info->npreorder);

	fprintf(fp, "%d ", prule->npat_len);
	for(i=0;i<prule->npat_len;i++)
		fprintf(fp, "%s  ", gpAttrValues[prule->pattern[i]].szattr_value);
	fprintf(fp, "%d ", prule->ppat_info->nsupport);

	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
		fprintf(fp, "%f %f ", prule->ppat_info->dmean, prule->ppat_info->dstd_dev);
	else if(gsztarget_value[0]!=0)
		fprintf(fp, "%d ", ((int*)prule->ppat_info->ptgt_stat)[0]);
	else if(gbrule_multiclass_pairwise)
	{
		fprintf(fp, "%d ", prule->ntgt_class);
		fprintf(fp, "%d ", ((int*)prule->ppat_info->ptgt_stat)[prule->ntgt_class]);
	}
	else
	{
		for(i=0;i<gnum_of_tgt_values;i++)
			fprintf(fp, "%d ", ((int*)prule->ppat_info->ptgt_stat)[i]);
	}

	fprintf(fp, "%.3f %.4E ", prule->dscore, prule->dpvalue);
	fprintf(fp, "%.4E %.3E", prule->dadjusted_pvalue, prule->dcond_pvalue);

	fprintf(fp, "\n");
}

void ConvertRules(char* szrule_filename, char* szattrvalue_filename, char* szoutput_filename)
{
	FILE *fp, *fpout;
	ASSOCRULE onerule;
	int nrule_no;

	fp = fopen(szrule_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szrule_filename);
		return;
	}
	fpout = fopen(szoutput_filename, "wt");
	if(fpout==NULL)
	{
		printf("Error: cannot open file %s for write\n", szoutput_filename);
		return;
	}

	onerule.pattern = new int[gnum_of_attrs];
	onerule.ppat_info = new PAT_INFO;
	onerule.ppat_info->ptgt_stat = new char[gntgt_stat_size];

	nrule_no = 0;
	LoadOneRule(fp, &onerule);
	while(onerule.ppat_info->npreorder>=-1)
	{
		OutputOneRuleRaw(fpout, &onerule);

		nrule_no++;

		LoadOneRule(fp, &onerule);
	}

	delete []onerule.ppat_info->ptgt_stat;
	delete onerule.ppat_info;
	delete []onerule.pattern;

	fclose(fp);
	fclose(fpout);
}
//------


//------ routines for sorting rules and for finding cut-off p-value threshold
void LoadRules(char* szrule_filename, int num_of_rules, ASSOCRULE *prules, int *ppat_buf)
{
	FILE *fp;
	int npat_buf_pos, i;

	fp = fopen(szrule_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szrule_filename);
		return;
	}

	npat_buf_pos = 0;
	for(i=0;i<num_of_rules;i++)
	{
		prules[i].pattern = &ppat_buf[npat_buf_pos];
		LoadOneRule(fp, &prules[i]);

		if(prules[i].ppat_info->npreorder==-10)
		{
			printf("Error: the rule file should not end\n");
			break;
		}

		npat_buf_pos += prules[i].npat_len;
	}
	fclose(fp);
}

int comp_rule(const void *e1, const void *e2)
{
	ASSOCRULE *p1, *p2;

	p1 = (ASSOCRULE*)e1;
	p2 = (ASSOCRULE*)e2;

	if(p1->dpvalue < p2->dpvalue)
		return -1;
	else if(p1->dpvalue > p2->dpvalue)
		return 1;
	else if(p1->dscore > p2->dscore)
		return -1;
	else if(p1->dscore < p2->dscore)
		return 1;
	else if(p1->dadjusted_pvalue < p2->dadjusted_pvalue)
		return -1;
	else if(p1->dadjusted_pvalue > p2->dadjusted_pvalue)
		return 1;
	else if(p1->npat_len < p2->npat_len)
		return -1;
	else if(p1->npat_len > p2->npat_len)
		return 1;
	else if(p1->ppat_info->npreorder < p2->ppat_info->npreorder)
		return -1;
	else if(p1->ppat_info->npreorder > p2->ppat_info->npreorder)
		return 1;
	else
		return 0;
}

void SortRules(char* szoutput_name)
{
	FILE *fp;
	char szrule_filename[200], szrulenum_filename[200];
	ASSOCRULE *prules;
	PAT_INFO *ppat_info_buf;
	int *ppat_buf, i;
	char* ptgt_stat_buf;
	
	struct timeb start, end;
	
	ftime(&start);

	gdused_mem_size = 0;
	gdmax_used_mem_size = 0;

	LoadRuleSummary(szoutput_name);

	//load rules
	prules = new ASSOCRULE[gnum_of_output_rules];
	IncMemSize(sizeof(ASSOCRULE)*gnum_of_output_rules);
	ppat_info_buf= new PAT_INFO[gnum_of_output_rules];
	IncMemSize(sizeof(PAT_INFO)*gnum_of_output_rules);
	ptgt_stat_buf = NewCharArray(gnum_of_output_rules*gntgt_stat_size);
	for(i=0;i<gnum_of_output_rules;i++)
	{
		prules[i].ppat_info = &ppat_info_buf[i];
		prules[i].ppat_info->ptgt_stat = &ptgt_stat_buf[i*gntgt_stat_size];
	}
	ppat_buf = NewIntArray(gntotal_context_len);

	sprintf(szrule_filename, "%s.rules.txt", szoutput_name);
	LoadRules(szrule_filename, gnum_of_output_rules, prules, ppat_buf);

	qsort(prules, gnum_of_output_rules, sizeof(ASSOCRULE), comp_rule);

	//output sorted rules
	sprintf(szrule_filename, "%s.sorted.rules.txt", szoutput_name);
	fp= fopen(szrule_filename, "wt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for write\n", szrule_filename);
	}
	else
	{
		for(i=0;i<gnum_of_output_rules;i++)
		{
			qsort(prules[i].pattern, prules[i].npat_len, sizeof(int), comp_int_asc);
			OutputOneRule(fp, &prules[i]);
		}
		fclose(fp);
	}

	GetRuleNumNThres(prules, gnum_of_output_rules, &gosign_rule_nums);

	sprintf(szrulenum_filename, "%s.sorted", szoutput_name);
	OutputSignRuleNums(szrulenum_filename, &gosign_rule_nums);

	delete []prules;
	DecMemSize(sizeof(ASSOCRULE)*gnum_of_output_rules);
	delete []ppat_info_buf;
	DecMemSize(sizeof(PAT_INFO)*gnum_of_output_rules);
	DelIntArray(ppat_buf, gntotal_context_len);
	DelCharArray(ptgt_stat_buf, gnum_of_output_rules*gntgt_stat_size);

	ftime(&end);
	gdsort_rule_time = end.time-start.time+(double)(end.millitm-start.millitm)/1000;
	gdsort_rule_max_mem_size = gdmax_used_mem_size;
}

void GetRuleNumNThres(ASSOCRULE *prules, int num_of_rules, SIGN_RULE_NUM *prule_nums)
{
	int i;

	// Bonferroni correction
	gdBC_pvalue_thres = gdmax_pvalue/gnum_of_tests;
	prule_nums->num_of_sign_rules = 0;
	prule_nums->num_of_BCsign_rules = 0;
	for(i=0;i<num_of_rules;i++)
	{
		if(prules[i].dpvalue<=gdmax_pvalue)
		{
			prule_nums->num_of_sign_rules++;
			if(prules[i].dpvalue<=gdBC_pvalue_thres)
				prule_nums->num_of_BCsign_rules++;
		}
		else
			break;
	}

	//Benjamini and Hochberg's method
	prule_nums->num_of_BHsign_rules = 0;
	for(i=0;i<num_of_rules;i++)
	{
		if(prules[i].dpvalue<=(double)(i+1)/gnum_of_tests*gdmax_pvalue)
			prule_nums->num_of_BHsign_rules++;
		else 
			break;
	}
	if(i>0)
		prule_nums->dBH_pvalue_thres = prules[i-1].dpvalue;
	else 
		prule_nums->dBH_pvalue_thres = 0;


	printf("#significant rules at %.2E level: %d\n", gdmax_pvalue, prule_nums->num_of_sign_rules);
	printf("#significant rules at %.2E level: %d\n", gdBC_pvalue_thres, prule_nums->num_of_BCsign_rules);
	printf("#significant rules at FDR %.2E: %d (%.3E)\n", gdmax_pvalue, prule_nums->num_of_BHsign_rules, prule_nums->dBH_pvalue_thres);
	printf("\n");

	if(gncorrection_method!=-1)
	{
		//control family wise error rate at 0.05
		prule_nums->num_of_permFWER_rules = 0;
		for(i=0;i<num_of_rules;i++)
		{
			if(prules[i].dpvalue<=gdpermFWER_pvalue_thres)
				prule_nums->num_of_permFWER_rules++;
			else 
				break;
		}

		// Bonferroni correction
		prule_nums->num_of_perm_sign_rules = 0;
		prule_nums->num_of_perm_BCsign_rules = 0;
		for(i=0;i<num_of_rules;i++)
		{
			if(prules[i].dadjusted_pvalue<=gdmax_pvalue)
			{
				prule_nums->num_of_perm_sign_rules++;
				if(prules[i].dadjusted_pvalue<=gdBC_pvalue_thres)
					prule_nums->num_of_perm_BCsign_rules++;
			}
			else 
				break;
		}


		//Westfall & Yong's method
		prule_nums->num_of_perm_BHsign_rules = 0;
		for(i=0;i<num_of_rules;i++)
		{
			if(prules[i].dadjusted_pvalue<=(double)(i+1)/gnum_of_tests*gdmax_pvalue)
				prule_nums->num_of_perm_BHsign_rules++;
			else 
				break;
		}
		if(i>0)
			prule_nums->dpermBH_pvalue_thres = prules[i-1].dadjusted_pvalue;
		else
			prule_nums->dpermBH_pvalue_thres = 0;


		printf("p-values adjusted by permutation tests:\n");
		printf("#significant rules at %.2E level: %d\n", gdmax_pvalue, prule_nums->num_of_perm_sign_rules);
		printf("#significant rules at %.2E level (FWER): %d\n", gdpermFWER_pvalue_thres, prule_nums->num_of_permFWER_rules);
		printf("#significant rules at FDR %.2E: %d (%.3E)\n", gdmax_pvalue, prule_nums->num_of_perm_BHsign_rules, prule_nums->dpermBH_pvalue_thres);
		printf("\n");
	}
}

void OutputSignRuleNums(char* szoutput_name, SIGN_RULE_NUM *psign_rule_nums)
{
	FILE *fp;
	char szoutput_filename[200];

	sprintf(szoutput_filename, "%s.rulenum.txt", szoutput_name);
	fp = fopen(szoutput_filename, "wt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for write\n", szoutput_filename);
		return;
	}

	fprintf(fp, "%d\n", gnum_of_tests);
	fprintf(fp, "%.3E\n", gdmax_pvalue);
	fprintf(fp, "%.3E\n", psign_rule_nums->dBH_pvalue_thres);
	fprintf(fp, "%.3E\n", gdpermFWER_pvalue_thres);
	fprintf(fp, "%.3E\n", psign_rule_nums->dpermBH_pvalue_thres);

	fprintf(fp, "%d\n", psign_rule_nums->num_of_sign_rules);
	fprintf(fp, "%d\n", psign_rule_nums->num_of_BCsign_rules);
	fprintf(fp, "%d\n", psign_rule_nums->num_of_BHsign_rules);
	fprintf(fp, "%d\n", psign_rule_nums->num_of_perm_sign_rules);
	fprintf(fp, "%d\n", psign_rule_nums->num_of_perm_BCsign_rules);
	fprintf(fp, "%d\n", psign_rule_nums->num_of_perm_BHsign_rules);
	fprintf(fp, "%d\n", psign_rule_nums->num_of_permFWER_rules);

	fclose(fp);
}

int comp_rule_adj(const void *e1, const void *e2)
{
	ASSOCRULE *p1, *p2;

	p1 = (ASSOCRULE*)e1;
	p2 = (ASSOCRULE*)e2;

	if(p1->dadjusted_pvalue < p2->dadjusted_pvalue)
		return -1;
	else if(p1->dadjusted_pvalue > p2->dadjusted_pvalue)
		return 1;
	else if(p1->dpvalue < p2->dpvalue)
		return -1;
	else if(p1->dpvalue > p2->dpvalue)
		return 1;
	else if(p1->dscore > p2->dscore)
		return -1;
	else if(p1->dscore < p2->dscore)
		return 1;
	else if(p1->npat_len < p2->npat_len)
		return -1;
	else if(p1->npat_len > p2->npat_len)
		return 1;
	else
		return 0;
}

void SortRulesSupAdjusted(char* szoutput_name)
{
	FILE *fp;
	char szrule_filename[200], szrulenum_filename[200];
	ASSOCRULE *prules;
	PAT_INFO *ppat_info_buf;
	int *ppat_buf, i;
	char* ptgt_stat_buf;
	SIGN_RULE_NUM thesign_rule_nums;
	
	struct timeb start, end;
	
	ftime(&start);

	gdused_mem_size = 0;
	gdmax_used_mem_size = 0;

	LoadRuleSummary(szoutput_name);

	//load rules
	prules = new ASSOCRULE[gnum_of_output_rules];
	IncMemSize(sizeof(ASSOCRULE)*gnum_of_output_rules);
	ppat_info_buf= new PAT_INFO[gnum_of_output_rules];
	IncMemSize(sizeof(PAT_INFO)*gnum_of_output_rules);
	ptgt_stat_buf = NewCharArray(gnum_of_output_rules*gntgt_stat_size);
	for(i=0;i<gnum_of_output_rules;i++)
	{
		prules[i].ppat_info = &ppat_info_buf[i];
		prules[i].ppat_info->ptgt_stat = &ptgt_stat_buf[i*gntgt_stat_size];
	}
	ppat_buf = NewIntArray(gntotal_context_len);

	sprintf(szrule_filename, "%s.rules.txt", szoutput_name);
	LoadRules(szrule_filename, gnum_of_output_rules, prules, ppat_buf);

	//get number of tests at each support level
	for(i=gndb_size-2;i>=gnmin_sup;i--)
		gpsup_testnums[i] += gpsup_testnums[i+1];
	if(gpsup_testnums[gnmin_sup]!=gnum_of_tests)
		printf("Error: inconsistent number of tests\n");

	for(i=0;i<gnum_of_output_rules;i++)
		prules[i].dadjusted_pvalue = prules[i].dpvalue*gpsup_testnums[prules[i].ppat_info->nsupport];

	qsort(prules, gnum_of_output_rules, sizeof(ASSOCRULE), comp_rule_adj);

	//output sorted rules
	sprintf(szrule_filename, "%s.supadj.rules.txt", szoutput_name);
	fp= fopen(szrule_filename, "wt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for write\n", szrule_filename);
	}
	else
	{
		for(i=0;i<gnum_of_output_rules;i++)
		{
			qsort(prules[i].pattern, prules[i].npat_len, sizeof(int), comp_int_asc);
			OutputOneRule(fp, &prules[i]);
		}
		fclose(fp);
	}

	GetRuleNumSupAdj(prules, gnum_of_output_rules, &thesign_rule_nums);

	sprintf(szrulenum_filename, "%s.supadj", szoutput_name);
	OutputSignRuleNums(szrulenum_filename, &thesign_rule_nums);

	delete []prules;
	DecMemSize(sizeof(ASSOCRULE)*gnum_of_output_rules);
	delete []ppat_info_buf;
	DecMemSize(sizeof(PAT_INFO)*gnum_of_output_rules);
	DelIntArray(ppat_buf, gntotal_context_len);
	DelCharArray(ptgt_stat_buf, gnum_of_output_rules*gntgt_stat_size);

	ftime(&end);
	gdsort_rule_time = end.time-start.time+(double)(end.millitm-start.millitm)/1000;
	gdsort_rule_max_mem_size = gdmax_used_mem_size;
}


void GetRuleNumSupAdj(ASSOCRULE *prules, int num_of_rules, SIGN_RULE_NUM *prule_nums)
{
	int i;

	prule_nums->dBH_pvalue_thres = 0;
	prule_nums->dpermBH_pvalue_thres = 0;
	prule_nums->num_of_BCsign_rules = 0;
	prule_nums->num_of_BHsign_rules = 0;
	prule_nums->num_of_perm_sign_rules = 0;
	prule_nums->num_of_perm_BCsign_rules = 0;
	prule_nums->num_of_perm_BHsign_rules = 0;
	prule_nums->num_of_permFWER_rules = 0;

	prule_nums->num_of_sign_rules = 0;
	for(i=0;i<num_of_rules;i++)
	{
		if(prules[i].dadjusted_pvalue<=gdmax_pvalue)
			prule_nums->num_of_sign_rules++;
		else
			break;
	}

	printf("#significant rules at %.2E level adjusted by support levels: %d\n", gdmax_pvalue, prule_nums->num_of_sign_rules);
	printf("\n");

}
//------


