#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <sys/timeb.h>

#include "FPtree.h"
#include "cfptree_outbuf.h"


FPNODE_BUF gofpnode_buf;
HEADER_TABLE gpheader_table;

HEADER_TABLE gpdfs_header_array;
int gndfs_header_size;
int gndfs_header_pos;

char* gpdfs_tgtstat_array;
int gnfptree_tgt_stat_size;

int *gpprefix_itemset;
int gnprefix_len;
int gndepth;
int gnnon_context_comp_item_num;
int gnother_item_num;
int gntree_level;
int *gpsingle_branch;
int gnfirst_level_len;

int *gptransaction;
int *gpdfs_item_order_map;

map<string, int> *gpattrvalue_item_map;
map<string, int> *gptgt_attrvalue_map;

CTreeOutBufManager gotree_bufmanager;

void FPtree::MinePats()
{
	FP_NODE *proot;
	HEADER_TABLE pheader_table;
	int i, num_of_items, num_of_freq_comparing_items;
	char* ptgt_stat_array;

	struct timeb start, end;

	printf("Finding large sub-populations...\n");

	ftime(&start);

	gnmax_pattern_len = 0;
	gnmax_sup = 0;
	gntotal_call = 0;
	gnborder_check_times = 0;
	gdused_mem_size = 0;
	gdmax_used_mem_size = 0;
	gntree_init_size = 0;
	gntree_max_size = 0;
	gnfreq_pat_num = 0;
	gnnonclosed_freq_pat_num = 0;
	gninfreq_pat_num = 0;
	gnum_of_closed_pats = 0;

	//count frequent items in original database
	ScanDBCountFreqItems(gsznames_filename, gszdata_filename);

	if(gdmin_sup<1)
	{
		gnmin_sup = (int)(gndb_size*gdmin_sup);
		if(gnmin_sup<gndb_size*gdmin_sup)
			gnmin_sup++;
	}
	printf("minimum support: %d\n", gnmin_sup);

	OutputItemSup(gszoutput_filename);

	gpprefix_itemset = NewIntArray(gnum_of_attrs);
	gnprefix_len = 0;
	gnnon_context_comp_item_num = 0;
	gnother_item_num = 0;

	gndepth = 0;
	gndfs_header_pos = 0; 
	gndfs_header_size = 0;
	pheader_table = NewHeaderTable(gnum_of_items, ptgt_stat_array);

	//enumerate frequent itemsets
	num_of_items = 0;
	num_of_freq_comparing_items = 0;
	for(i=0;i<gnum_of_items;i++)
	{
		if(gpAttrValues[i].nsup>0 && (gnmaterialization_mode==FREQ_N_INFREQ_BORDER || gpAttrValues[i].bis_context_item || gpAttrValues[i].bis_comparing_item))
		{
			if(gpAttrValues[i].nsup==gndb_size)
				gpprefix_itemset[gnprefix_len++] = i;
			else if(gnmaterialization_mode==FREQ_N_INFREQ_BORDER || gpAttrValues[i].nsup>=gnmin_sup)
			{
				pheader_table[num_of_items].nitem = i;
				pheader_table[num_of_items].nsupport = gpAttrValues[i].nsup;
				pheader_table[num_of_items].pconddb = NULL;
				num_of_items++;
				if(gpAttrValues[i].bis_comparing_item && gpAttrValues[i].nsup>=gnmin_sup)
					num_of_freq_comparing_items++;
			}
		}
	}
	gnfirst_level_len = gnprefix_len;
	gnum_of_freq_items = num_of_items;

	gotree_bufmanager.Init();
	if(gnprefix_len>0)
	{
		if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
			gotree_bufmanager.InsertCommonPrefixNode(gpprefix_itemset, gnprefix_len, gndb_size, (char*)gtgt_stat.ptgt_sum);
		else if(gsztarget_value[0]==0)
			gotree_bufmanager.InsertCommonPrefixNode(gpprefix_itemset, gnprefix_len, gndb_size, (char*)gtgt_stat.ptgt_sups);
		else
			gotree_bufmanager.InsertCommonPrefixNode(gpprefix_itemset, gnprefix_len, gndb_size, (char*)&gtgt_stat.ntgt_sup);
	}

	if(num_of_freq_comparing_items>0 && num_of_items>0)
	{
		gpheader_table = pheader_table;

		gpdfs_item_order_map = NewIntArray(gnum_of_items, -1);
		for(i=0;i<num_of_items;i++)
			gpdfs_item_order_map[pheader_table[i].nitem] = i;

		gptransaction = NewIntArray(gnum_of_attrs);
		gpsingle_branch = NewIntArray(gnum_of_attrs);

		//for dfs mining
		gndfs_header_size = gnum_of_items*5;
		gpdfs_header_array = new HEADER_NODE[gndfs_header_size];
		IncMemSize(sizeof(HEADER_NODE)*gndfs_header_size);
		gpdfs_tgtstat_array = NewCharArray(gntgt_stat_size*gndfs_header_size);
		gndfs_header_pos = 0;

		// for allocating space for fptree
		gofpnode_buf.phead = NewFPNodePage();
		gofpnode_buf.pcur_page = gofpnode_buf.phead;
		gofpnode_buf.ncur_pos = 0;
		gofpnode_buf.ntotal_pages = 1;	

		proot = ScanDBBuildFPtree(gszdata_filename, pheader_table, gpdfs_item_order_map);
		gntree_init_size = sizeof(FPNODE_BUF)+gofpnode_buf.ntotal_pages*(sizeof(FP_NODE)*FPNODE_PAGE_SIZE+sizeof(FPNODE_PAGE));
		ftime(&end);
		printf("Building initial FP-tree: %.3f\n", end.time-start.time+(double)(end.millitm-start.millitm)/1000);

		if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
		{
			for(i=0;i<num_of_items;i++)
				memcpy(&ptgt_stat_array[i*gntgt_stat_size], gpAttrValues[pheader_table[i].nitem].ptgt_sum, sizeof(TGT_SUM));
		}
		else if(gsztarget_value[0]==0)
		{
			for(i=0;i<num_of_items;i++)
				memcpy(&ptgt_stat_array[i*gntgt_stat_size], gpAttrValues[pheader_table[i].nitem].ptgt_sups, sizeof(int)*gnum_of_tgt_values);
		}
		else
		{
			for(i=0;i<num_of_items;i++)
				memcpy(&ptgt_stat_array[i*gntgt_stat_size], &gpAttrValues[pheader_table[i].nitem].ntgt_sup, sizeof(int));
		}

		gotree_bufmanager.InsertInternalNode(pheader_table, ptgt_stat_array, num_of_items);

		DepthFGGrowth(proot, pheader_table, ptgt_stat_array, num_of_items);

		gotree_bufmanager.WriteInternalNode();

		gntree_max_size = sizeof(FPNODE_BUF)+gofpnode_buf.ntotal_pages*(sizeof(FP_NODE)*FPNODE_PAGE_SIZE+sizeof(FPNODE_PAGE));
		
		DelIntArray(gptransaction, gnum_of_attrs);
		DelIntArray(gpsingle_branch, gnum_of_attrs);

		//for DFS mining
		DecMemSize(sizeof(HEADER_NODE)*gndfs_header_size);
		delete []gpdfs_header_array;
		DelCharArray(gpdfs_tgtstat_array, gntgt_stat_size*gndfs_header_size);

		//releasing fpnode buffer
		FPNODE_PAGE *pfppage, *pnext_fppage;
		pfppage = gofpnode_buf.phead;
		while(pfppage!=NULL)
		{
			pnext_fppage = pfppage->pnext;
			DelFFNodePage(pfppage);
			pfppage = pnext_fppage;	
			gofpnode_buf.ntotal_pages--;
		}
		if(gofpnode_buf.ntotal_pages!=0)
			printf("Error with number of FPNode pages\n");


		DelIntArray(gpdfs_item_order_map, gnum_of_items);	
	}
	else
	{
		delete gpattrvalue_item_map;
		delete gptgt_attrvalue_map;
		printf("No comparing item is frequent, so no hypothesis is generated.\n");
	}
	if(gnprefix_len>0)
		gotree_bufmanager.WriteCommonPrefixNode(1);

	gotree_bufmanager.Destroy();
	DelHeaderTable(pheader_table, ptgt_stat_array, gnum_of_items);
	DelIntArray(gpprefix_itemset, gnum_of_attrs);

	delete []gpAttributes;
	DecMemSize(sizeof(ATTRIBUTE)*gnum_of_attrs);
	DelCharArray(gszattr_name_buf, gnattr_name_buf_size);

	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
	{
		delete gtgt_stat.ptgt_sum;
		DecMemSize(sizeof(TGT_SUM));
		for(i=0;i<gnum_of_items;i++)
		{
			delete gpAttrValues[i].ptgt_sum;
			DecMemSize(sizeof(TGT_SUM));
		}
	}
	else if(gsztarget_value[0]==0)
	{
		DelIntArray(gtgt_stat.ptgt_sups, gnum_of_tgt_values);
		for(i=0;i<gnum_of_items;i++)
			DelIntArray(gpAttrValues[i].ptgt_sups, gnum_of_tgt_values);
	}
	delete []gpAttrValues;
	DecMemSize(sizeof(ATTR_VALUE)*gnum_of_items);
	DelCharArray(gszattr_value_buf, gnattr_value_buf_size);

	if(gdused_mem_size!=0)
		printf("Error: there are unreleased memory\n");

	ftime(&end);
	gdbuild_cfptree_time = end.time-start.time+(double)(end.millitm-start.millitm)/1000;

	printf("CFP-tree building time: %.3f\n", gdbuild_cfptree_time);
	printf("#frequent patterns: %d\n", gnfreq_pat_num);
	printf("#non-closed entries marked: %d\n", gnnonclosed_freq_pat_num);
	printf("#infrequent patterns on border: %d\n", gninfreq_pat_num);
	printf("\n");

	gdbuild_tree_max_mem_size = gdmax_used_mem_size;
}


void FPtree::LoadAttrValues(char *sznames_filename)
{
	FILE *fp;
	string szvalue;
	vector<string> vecAttrs, vecValues;
	char ch, szname[100];
	int nlen, i, nbuf_pos;
	map<string, int>::iterator map_it;

	gntgt_attr_no = -1;

	fp = fopen(sznames_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", sznames_filename);
		return;
	}

	gnattr_name_buf_size = 0;
	ch = fgetc(fp);
	while(!feof(fp))
	{
		nlen = 0;
		while(!feof(fp) && ch!=':' && ch!='\n')
		{
			szname[nlen++] = ch;
			ch = fgetc(fp);
		}
		szname[nlen] = 0;
		if(ch!=':')
			printf("Error: ':' expected\n");
		else
			ch = fgetc(fp);

		if(nlen>0)
		{
			vecAttrs.push_back(szname);
			gnattr_name_buf_size += nlen+1;
		}

		while(!feof(fp) && (ch==' ' || ch=='\t') && ch!='\n')
			ch = fgetc(fp);
		szvalue.clear();
		while(!feof(fp) && ch!='\n')
		{
			szvalue += ch;
			ch = fgetc(fp);
		}
		vecValues.push_back(szvalue);

		if(ch=='\n')
			ch = fgetc(fp);
	}
	fclose(fp);

	gnum_of_cmp_attrs = 0;

	gnum_of_attrs = (int)vecAttrs.size();
	gpAttributes = new ATTRIBUTE[gnum_of_attrs];
	IncMemSize(sizeof(ATTRIBUTE)*gnum_of_attrs);
	gszattr_name_buf = NewCharArray(gnattr_name_buf_size);
	nbuf_pos = 0;
	for(i=0;i<gnum_of_attrs;i++)
	{
		gpAttributes[i].szattr_name = &gszattr_name_buf[nbuf_pos];
		nbuf_pos += (int)vecAttrs[i].size()+1;
		strcpy(gpAttributes[i].szattr_name, vecAttrs[i].c_str());
		gpAttributes[i].nattr_no = i;
		gpAttributes[i].nmax_freq = 0;
		gpAttributes[i].num_of_values = 0;
		gpAttributes[i].norder = -1;

		if(strncmp(vecValues[i].c_str(), "continuous-normal", 17)==0)
			gpAttributes[i].nattr_type = CONTINUOUS_NORMAL;
		else if(strncmp(vecValues[i].c_str(), "continuous", 10)==0)
			gpAttributes[i].nattr_type = CONTINUOUS;
		else if(strncmp(vecValues[i].c_str(), "nominal", 7)==0)
			gpAttributes[i].nattr_type = NOMINAL;
		else if(strncmp(vecValues[i].c_str(), "ordinal", 7)==0)
			gpAttributes[i].nattr_type = ORDINAL;
		else 
		{
			gpAttributes[i].nattr_type = -1;
			printf("Error: unknown attribute type: %s\n", vecValues[i].c_str());
		}

		if(!gpcontext_attr_map->empty())
		{
			map_it = gpcontext_attr_map->find(gpAttributes[i].szattr_name);
			if(map_it!=gpcontext_attr_map->end())
				gpAttributes[i].bis_context_attr = true;
			else
				gpAttributes[i].bis_context_attr = false;
		}
		else
			gpAttributes[i].bis_context_attr = true;

		if(!gpcomparing_attr_map->empty())
		{
			map_it = gpcomparing_attr_map->find(gpAttributes[i].szattr_name);
			if(map_it!=gpcomparing_attr_map->end())
			{
				gpAttributes[i].bis_comparing_attr = true;
				gnum_of_cmp_attrs++;
			}
			else
				gpAttributes[i].bis_comparing_attr = false;
		}
		else if(gnsignal_type!=IS_RULE || gpAttributes[i].bis_context_attr)
		{
			gpAttributes[i].bis_comparing_attr = true;
			gnum_of_cmp_attrs++;
		}
		else 
			gpAttributes[i].bis_comparing_attr = false;

		if(strcmp(gpAttributes[i].szattr_name, gsztarget_attr)==0)
		{
			gntgt_attr_no = i;
			gntgt_attr_type = gpAttributes[i].nattr_type;
		}
	}
	if(nbuf_pos!=gnattr_name_buf_size)
		printf("Error: inconsistent buffer size\n");

	if(gntgt_attr_no==-1)
		printf("Error: cannot find target attribute %s in file %s\n", gsztarget_attr, sznames_filename);
}


//sort attributes based on their types, and the following order is used: comparing & !context, comparing & context
//!comparing & context, !comparing & !context
int comp_attr(const void *e1, const void *e2)
{
	ATTRIBUTE *p1, *p2;

	p1 = (ATTRIBUTE*)e1;
	p2 = (ATTRIBUTE*)e2;

	if(p1->nattr_type < p2->nattr_type)
		return -1;
	else if(p1->nattr_type > p2->nattr_type)
		return 1;
	else if(p1->bis_comparing_attr && !p2->bis_comparing_attr)
		return -1;
	else if(!p1->bis_comparing_attr && p2->bis_comparing_attr)
		return 1;
	else if(p1->bis_comparing_attr && p2->bis_comparing_attr)
	{
		if(p1->bis_context_attr && !p2->bis_context_attr)
			return 1;
		else if(!p1->bis_context_attr && p2->bis_context_attr)
			return -1;
		else if(p1->num_of_values < p2->num_of_values)
			return -1;
		else if(p1->num_of_values > p2->num_of_values)
			return 1;
		else if(p1->nmax_freq > p2->nmax_freq)
			return -1;
		else if(p1->nmax_freq < p2->nmax_freq)
			return 1;
		else
			return 0;
	}
	else
	{
		if(p1->bis_context_attr && !p2->bis_context_attr)
			return -1;
		else if(!p1->bis_context_attr && p2->bis_context_attr)
			return 1;
		else if(p1->num_of_values < p2->num_of_values)
			return -1;
		else if(p1->num_of_values > p2->num_of_values)
			return 1;
		else if(p1->nmax_freq > p2->nmax_freq)
			return -1;
		else if(p1->nmax_freq < p2->nmax_freq)
			return 1;
		else
			return 0;
	}
}

int comp_attr_no(const void *e1, const void *e2)
{
	ATTRIBUTE *p1, *p2;

	p1 = (ATTRIBUTE*)e1;
	p2 = (ATTRIBUTE*)e2;

	if(p1->nattr_no < p2->nattr_no)
		return -1;
	else if(p1->nattr_no > p2->nattr_no)
		return 1;
	else
		return 0;
}

//here the order field refers to the order of the attributes
int comp_attrvalue(const void *e1, const void *e2)
{
	ATTR_VALUE *p1, *p2;

	p1 = (ATTR_VALUE*)e1;
	p2 = (ATTR_VALUE*)e2;

	if(p1->norder < p2->norder)
		return -1;
	else if(p1->norder > p2->norder)
		return 1;
	else if(p1->nsup > p2->nsup)
		return -1;
	else if(p1->nsup < p2->nsup)
		return 1;
	else
		return 0;
}


// count the support of the items in the database. 
// if the target attribute is nominal or ordinal and the target attribute is not specified, get the distinct values of the target attribute
int FPtree::ScanDBCountFreqItems(char* sznames_filename, char* szdata_filename)
{
	FILE *fp;
	char ch, szvalue[100], szattr_value[200];
	int nlen, nattr_no, ncapacity, nbuf_pos, i;
	ATTR_VALUE *ptemp_values;
	map<string, int> tgtvalue_sup_map;
	map<string, int>::iterator map_it;
	float dtgt_value;

	LoadAttrValues(sznames_filename);

	gndb_size = 0;

	gnattr_value_buf_size = 0;
	gnum_of_items = 0;

	fp = fopen(szdata_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szdata_filename);
		return 0;
	}

	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
	{
		gtgt_stat.ptgt_sum = new TGT_SUM;
		IncMemSize(sizeof(TGT_SUM));
		gtgt_stat.ptgt_sum->dsum = 0;
		gtgt_stat.ptgt_sum->dsquare_sum = 0;
	}
	else if(gsztarget_value[0]!=0)
		gtgt_stat.ntgt_sup = 0;

	
	ncapacity = 2000;
	gpAttrValues = new ATTR_VALUE[ncapacity];
	IncMemSize(sizeof(ATTR_VALUE)*ncapacity);
	memset(gpAttrValues, 0, sizeof(ATTR_VALUE)*ncapacity);
	gpattrvalue_item_map = new map<string, int>;
	gptgt_attrvalue_map = new map<string, int>;

	ch = fgetc(fp);
	while(!feof(fp))
	{
		nattr_no = 0;
		while(!feof(fp) && ch!='\n')
		{
			while(!feof(fp) && ch==' ')
				ch = fgetc(fp);

			nlen = 0;
			while(!feof(fp) && ch!=',' && ch!='\t' && ch!='\n')
			{
				szvalue[nlen++] = ch;
				ch = fgetc(fp);
			}
			szvalue[nlen] = 0;
			if(nlen>0 && strcmp(szvalue, "?") && strcmp(szvalue, "ignore"))
			{
				if(nattr_no!=gntgt_attr_no)
				{
					if(gpAttributes[nattr_no].nattr_type==NOMINAL || gpAttributes[nattr_no].nattr_type==ORDINAL)
					{
						if(gpAttributes[nattr_no].bis_context_attr || gpAttributes[nattr_no].bis_comparing_attr)
						{
							sprintf(szattr_value, "%s=%s", gpAttributes[nattr_no].szattr_name, szvalue);
							map_it = gpattrvalue_item_map->find(szattr_value);
							if(map_it==gpattrvalue_item_map->end())
							{
								(*gpattrvalue_item_map)[szattr_value] = gnum_of_items;
								if(gnum_of_items>=ncapacity)
								{
									ptemp_values = new ATTR_VALUE[2*ncapacity];
									IncMemSize(sizeof(ATTR_VALUE)*2*ncapacity);
									memcpy(ptemp_values, gpAttrValues, sizeof(ATTR_VALUE)*ncapacity);
									memset(&(ptemp_values[ncapacity]), 0, sizeof(ATTR_VALUE)*ncapacity);
									delete []gpAttrValues;
									DecMemSize(sizeof(ATTR_VALUE)*ncapacity);
									gpAttrValues = ptemp_values;
									ncapacity *= 2;
								}
								gpAttrValues[gnum_of_items].nattr_no = nattr_no;
								gpAttrValues[gnum_of_items].bis_context_item = gpAttributes[nattr_no].bis_context_attr;
								gpAttrValues[gnum_of_items].bis_comparing_item = gpAttributes[nattr_no].bis_comparing_attr;
								gpAttrValues[gnum_of_items].nsup = 1;
								gnum_of_items++;
								gnattr_value_buf_size += (int)strlen(szattr_value)+1;
								gpAttributes[nattr_no].num_of_values++;
							}
							else
								gpAttrValues[map_it->second].nsup++;
						}
					}
				}
				else 
				{
					if(gntgt_attr_type==NOMINAL || gntgt_attr_type==ORDINAL)
					{
						if(gsztarget_value[0]==0)
						{
							map_it = tgtvalue_sup_map.find(szvalue);
							if(map_it==tgtvalue_sup_map.end())
								tgtvalue_sup_map[szvalue] = 1;
							else 
								map_it->second++;
						}
						else if(strcmp(szvalue, gsztarget_value)==0)
							gtgt_stat.ntgt_sup++;
					}
					else //continuous
					{
						dtgt_value = (float)atof(szvalue);
						gtgt_stat.ptgt_sum->dsum += dtgt_value;
						gtgt_stat.ptgt_sum->dsquare_sum += dtgt_value*dtgt_value;
					}
				}
			}
			nattr_no++;
			if(ch!='\n')
				ch = fgetc(fp);			
		}
		if(nattr_no!=gnum_of_attrs)
			printf("Error: inconsistent number of attributes\n");
		gndb_size++;

		if(ch=='\n')
			ch = fgetc(fp);
	}
	fclose(fp);

	ptemp_values = new ATTR_VALUE[gnum_of_items];
	IncMemSize(sizeof(ATTR_VALUE)*gnum_of_items);
	memcpy(ptemp_values, gpAttrValues, sizeof(ATTR_VALUE)*gnum_of_items);
	delete []gpAttrValues;
	DecMemSize(sizeof(ATTR_VALUE)*ncapacity);
	gpAttrValues = ptemp_values;

	gszattr_value_buf = NewCharArray(gnattr_value_buf_size);
	nbuf_pos = 0;
	for(map_it=gpattrvalue_item_map->begin();map_it!=gpattrvalue_item_map->end();map_it++)
	{
		gpAttrValues[map_it->second].szattr_value = &gszattr_value_buf[nbuf_pos];
		nbuf_pos += (int)map_it->first.size()+1;
		strcpy(gpAttrValues[map_it->second].szattr_value, map_it->first.c_str());
	}
	if(nbuf_pos!=gnattr_value_buf_size)
		printf("Error: inconsistent buffer size.\n");

	for(i=0;i<gnum_of_items;i++)
	{
		if(gpAttributes[gpAttrValues[i].nattr_no].nmax_freq<gpAttrValues[i].nsup)
			gpAttributes[gpAttrValues[i].nattr_no].nmax_freq = gpAttrValues[i].nsup;
	}
	qsort(gpAttributes, gnum_of_attrs, sizeof(ATTRIBUTE), comp_attr);
	for(i=0;i<gnum_of_attrs;i++)
		gpAttributes[i].norder = i;
	qsort(gpAttributes, gnum_of_attrs, sizeof(ATTRIBUTE), comp_attr_no);

	//here the order field refers to the order of the attributes
	for(i=0;i<gnum_of_items;i++)
		gpAttrValues[i].norder = gpAttributes[gpAttrValues[i].nattr_no].norder;
	qsort(gpAttrValues, gnum_of_items, sizeof(ATTR_VALUE), comp_attrvalue);
	OutputAttrValueItemMap(gszoutput_filename);
	

	gpattrvalue_item_map->clear();
	for(i=0;i<gnum_of_items;i++)
		(*gpattrvalue_item_map)[gpAttrValues[i].szattr_value] = i;

	gnum_of_tgt_values = (int)tgtvalue_sup_map.size();
	if(!tgtvalue_sup_map.empty())
	{
		gtgt_stat.ptgt_sups = NewIntArray(gnum_of_tgt_values, 0);
		SortNOutputTgtValueMap(gszoutput_filename, &tgtvalue_sup_map, gtgt_stat.ptgt_sups);
	}

	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
	{
		gnfptree_tgt_stat_size = sizeof(TGT_SUM);
		gntgt_stat_size = sizeof(TGT_SUM);
	}
	else if(gsztarget_value[0]==0)
	{
		gnfptree_tgt_stat_size = sizeof(int)*gnum_of_tgt_values;
		gntgt_stat_size = sizeof(int)*gnum_of_tgt_values;
	}
	else
	{
		gnfptree_tgt_stat_size = 0;
		gntgt_stat_size = sizeof(int);
		printf("support of %s: %d\n", gsztarget_value, gtgt_stat.ntgt_sup);
	}

	//sprintf(szattr_value, "%s=%s", gsztarget_attr, gsztarget_value);

	return gnum_of_items;
}

void FPtree::OutputAttrValueItemMap(char* szoutput_name)
{
	FILE *fp;
	char szattrvalue_item_map_filename[200];
	int i;

	sprintf(szattrvalue_item_map_filename, "%s.attrvalue2item.txt", szoutput_name);
	fp = fopen(szattrvalue_item_map_filename, "wt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for write\n", szattrvalue_item_map_filename);
		return;
	}

	for(i=0;i<gnum_of_items;i++)
		fprintf(fp, "%s\n", gpAttrValues[i].szattr_value);

	fclose(fp);
}

void FPtree::OutputItemStatis(char* szoutput_name)
{
	FILE *fp;
	char szoutput_filename[200];
	int i, j;

	sprintf(szoutput_filename, "%s.item.stat", szoutput_name);
	fp = fopen(szoutput_filename, "wt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for write\n", szoutput_filename);
		return;
	}
	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
	{
		double dmean, dstd_dev;

		for(i=0;i<gnum_of_items;i++)
		{
			fprintf(fp, "%d\t", i);
			fprintf(fp, "%d ", gpAttrValues[i].nsup);

			dmean = gpAttrValues[i].ptgt_sum->dsum/gpAttrValues[i].nsup;
			dstd_dev = sqrt((gpAttrValues[i].ptgt_sum->dsquare_sum-gpAttrValues[i].ptgt_sum->dsum*dmean)/gpAttrValues[i].nsup);
			fprintf(fp, "%lf  %lf\n", dmean, dstd_dev);
		}
	}
	else if(gsztarget_value[0]!=0)
	{
		for(i=0;i<gnum_of_items;i++)
		{
			fprintf(fp, "%d\t", i);
			fprintf(fp, "%d ", gpAttrValues[i].nsup);

			fprintf(fp, "%d\n", gpAttrValues[i].ntgt_sup);
		}
	}
	else 
	{
		for(i=0;i<gnum_of_items;i++)
		{
			fprintf(fp, "%d\t", i);
			fprintf(fp, "%d ", gpAttrValues[i].nsup);

			for(j=0;j<gnum_of_tgt_values;j++)
				fprintf(fp, "%d ", gpAttrValues[i].ptgt_sups[j]);
			fprintf(fp, "\n");
		}
	}

	fclose(fp);
}

void FPtree::SortNOutputTgtValueMap(char* szoutput_name, map<string, int> *ptgtvalue_sup_map, int *ptgt_sups)
{
	FILE *fp;
	char szoutput_filename[200];
	map<string, int>::iterator map_it;
	vector<string> vecValues;
	int num_of_values, i;

	sprintf(szoutput_filename, "%s.tgtvalues.map", szoutput_name);
	fp = fopen(szoutput_filename, "wt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for write\n", szoutput_filename);
		return;
	}

	i = 0;
	for(map_it=ptgtvalue_sup_map->begin();map_it!=ptgtvalue_sup_map->end();map_it++)
	{
		vecValues.push_back(map_it->first);
		ptgt_sups[i++] = map_it->second;
	}

	num_of_values = (int)(vecValues.size());

	gptgt_attrvalue_map->clear();
	for(i=0;i<num_of_values;i++)
	{
		fprintf(fp, "%s\n", vecValues[i].c_str());
		(*gptgt_attrvalue_map)[vecValues[i].c_str()] = i;
	}

	fclose(fp);
}

FP_NODE* FPtree::ScanDBBuildFPtree(char* szdata_filename, HEADER_TABLE pheader_table, int *pitem_order_map)
{
	FILE *fp;
	char ch, szvalue[100], szattr_value[200];
	int nlen, nattr_no, ntrans_len, *ptemp_trans, ntemp_trans_len, i, ntgt_value, ntid;
	map<string, int>::iterator map_it;
	FP_NODE *proot;
	double dtgt_value, dsquare_value;
	//bool boutput_tdb;

	proot = NULL;

	//if(gncorrection_method==PERMUTATION || gncorrection_method==PERMUTATION_SUPERSET || 
	//	gncorrection_method==SIMULATED_PERM_HYBRID || gnoutput_mode==OUTPUT_REPRESENTATIVE && gdmax_local_pvalue<1
	//	|| gnsignal_type==IS_HYPOTHESIS && gntgt_attr_type==CONTINUOUS && gnmin_sup<CLT_THRESHOLD || gbgen_tid_list==true)
	//	boutput_tdb = true;
	//else
	//	boutput_tdb = false;

	fp = fopen(szdata_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szdata_filename);
		return NULL;
	}

	//if(boutput_tdb)
	OpenTdbFiles(gszoutput_filename);

	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
	{
		for(i=0;i<gnum_of_items;i++)
		{
			gpAttrValues[i].ptgt_sum = new TGT_SUM;
			IncMemSize(sizeof(TGT_SUM));
			gpAttrValues[i].ptgt_sum->dsum = 0;
			gpAttrValues[i].ptgt_sum->dsquare_sum = 0;
		}
	}
	else if(gsztarget_value[0]==0)
	{
		for(i=0;i<gnum_of_items;i++)
			gpAttrValues[i].ptgt_sups = NewIntArray(gnum_of_tgt_values, 0);
	}
	else
	{
		for(i=0;i<gnum_of_items;i++)
			gpAttrValues[i].ntgt_sup = 0;
	}

	ptemp_trans = NewIntArray(gnum_of_attrs);

	ntid = 0;

	ch = fgetc(fp);
	while(!feof(fp))
	{
		nattr_no = 0;
		ntrans_len = 0;
		ntemp_trans_len = 0;
		ntgt_value = -1;
		while(!feof(fp) && ch!='\n')
		{
			while(!feof(fp) && ch==' ')
				ch = fgetc(fp);

			nlen = 0;
			while(!feof(fp) && ch!=',' && ch!='\t' && ch!='\n')
			{
				szvalue[nlen++] = ch;
				ch = fgetc(fp);
			}
			szvalue[nlen] = 0;

			if(nlen>0 && strcmp(szvalue, "?") && strcmp(szvalue, "ignore"))
			{
				if(nattr_no==gntgt_attr_no)
				{
					if(gntgt_attr_type==NOMINAL || gntgt_attr_type==ORDINAL)
					{
						if(gsztarget_value[0]==0)
						{
							map_it = gptgt_attrvalue_map->find(szvalue);
							if(map_it==gptgt_attrvalue_map->end())
								printf("Error: cannot find value of the target attribute in the map: %s\n", szvalue);
							else
								ntgt_value = map_it->second;
						}
						else
						{
							if(strcmp(szvalue, gsztarget_value)==0)
								ntgt_value = 1;
							else
								ntgt_value = 0;
						}
					}
					else 
					{
						ntgt_value = 0;
						dtgt_value = atof(szvalue);
					}
				}
				else if(gpAttributes[nattr_no].nattr_type==NOMINAL || gpAttributes[nattr_no].nattr_type==ORDINAL)
				{
					if(gpAttributes[nattr_no].bis_context_attr || gpAttributes[nattr_no].bis_comparing_attr)
					{
						sprintf(szattr_value, "%s=%s", gpAttributes[nattr_no].szattr_name, szvalue);
						map_it = gpattrvalue_item_map->find(szattr_value);
						if(map_it==gpattrvalue_item_map->end())
							printf("Error: cannot find attribute value %s in the map\n", szattr_value);
						else 
						{
							ptemp_trans[ntemp_trans_len++] = map_it->second; 						
							if(pitem_order_map[map_it->second]>=0) //frequent item only 
								gptransaction[ntrans_len++] = pitem_order_map[map_it->second];
						}
					}
				}
			}
			nattr_no++;
			if(ch!='\n')
				ch = fgetc(fp);
		}

		//if(boutput_tdb)
		//{
		if(gntgt_attr_type==NOMINAL || gntgt_attr_type==ORDINAL)
			OutputOneTrans(ptemp_trans, ntemp_trans_len, &ntgt_value);
		else 
			OutputOneTrans(ptemp_trans, ntemp_trans_len, &dtgt_value);
		//}

		if(ntemp_trans_len>0 && ntgt_value>=0)
		{
			if(ntrans_len>1)
				qsort(gptransaction, ntrans_len, sizeof(int), comp_int_asc);
			if(gntgt_attr_type==NOMINAL || gntgt_attr_type==ORDINAL)
			{
				if(gsztarget_value[0]==0)
				{
					for(i=0;i<ntemp_trans_len;i++)
						gpAttrValues[ptemp_trans[i]].ptgt_sups[ntgt_value]++;
					if(ntrans_len>0)
						InsertTransaction(proot, pheader_table, gptransaction, ntrans_len, ntgt_value);
				}
				else 
				{
					if(ntgt_value==1)
					{
						for(i=0;i<ntemp_trans_len;i++)
							gpAttrValues[ptemp_trans[i]].ntgt_sup++;
					}
					if(ntrans_len>0)
						InsertTransaction(proot, pheader_table, gptransaction, ntrans_len, 1, ntgt_value);
				}
			}
			else
			{
				dsquare_value = dtgt_value*dtgt_value;
				for(i=0;i<ntemp_trans_len;i++)
				{
					gpAttrValues[ptemp_trans[i]].ptgt_sum->dsum += dtgt_value;
					gpAttrValues[ptemp_trans[i]].ptgt_sum->dsquare_sum += dsquare_value;
				}
				if(ntrans_len>0)
					InsertTransaction(proot, pheader_table, gptransaction, ntrans_len, 1, dtgt_value, dsquare_value);
			}
		}
		ntid++;

		if(ch=='\n')
			ch = fgetc(fp);
	}
	fclose(fp);

	//if(boutput_tdb)
	CloseTdbFiles();
	OutputItemStatis(gszoutput_filename);
	
	DelIntArray(ptemp_trans, gnum_of_attrs);

	delete gpattrvalue_item_map;
	delete gptgt_attrvalue_map;

	return proot;
}


//-------------------------------------  FP-tree ------------------------------------------
//Insert a transaction into FP-tree
//the target attribute is continuous
void FPtree::InsertTransaction(FP_NODE* &proot, HEADER_TABLE pheader_table, int* ptransaction, int length, int frequency, double dsum, double dsquare_sum)
{
	FP_NODE *pcur_node, *pnew_node, *pparent, *pleftsib;
	int i, j;

	pcur_node = proot;
	pparent = NULL;
	pleftsib = NULL;
	for(i=0;i<length;i++)
	{
		while(pcur_node!=NULL && pcur_node->nitem_order<ptransaction[i])
		{
			pleftsib = pcur_node;
			pcur_node = pcur_node->prightsibling;
		}

		if(pcur_node==NULL || pcur_node->nitem_order>ptransaction[i])
		{
			pnew_node = NewOneFPNode();
			pnew_node->nitem_order = ptransaction[i];
			pnew_node->frequency = frequency;
			pnew_node->tgt_union.ptgt_sum->dsum = dsum;
			pnew_node->tgt_union.ptgt_sum->dsquare_sum = dsquare_sum;
			pnew_node->pchild = NULL;
			pnew_node->pparent = pparent;
			pnew_node->prightsibling = pcur_node;
			if(pleftsib!=NULL)
				pleftsib->prightsibling = pnew_node;
			else if(pparent!=NULL)
				pparent->pchild = pnew_node;
			pnew_node->pnode_link = pheader_table[pnew_node->nitem_order].pconddb;
			pheader_table[pnew_node->nitem_order].pconddb = pnew_node;
			if(i==0 && pleftsib==NULL)
				proot = pnew_node;
			pparent = pnew_node;
			for(j=i+1;j<length;j++)
			{
				pnew_node = NewOneFPNode();
				pnew_node->nitem_order = ptransaction[j];
				pnew_node->frequency = frequency;
				pnew_node->tgt_union.ptgt_sum->dsum = dsum;
				pnew_node->tgt_union.ptgt_sum->dsquare_sum = dsquare_sum;
				pnew_node->pchild = NULL;
				pnew_node->pparent = pparent;
				pparent->pchild = pnew_node;
				pnew_node->prightsibling = NULL;
				pnew_node->pnode_link = pheader_table[pnew_node->nitem_order].pconddb;
				pheader_table[pnew_node->nitem_order].pconddb = pnew_node;
				pparent = pnew_node;
			}
			break;
		}
		else 
		{
			pcur_node->frequency += frequency;
			pcur_node->tgt_union.ptgt_sum->dsum += dsum;
			pcur_node->tgt_union.ptgt_sum->dsquare_sum += dsquare_sum;
			pparent = pcur_node;
			pcur_node = pcur_node->pchild;
			pleftsib = NULL;
		}
	}
}

//the target attribute is nominal or ordinal, and the target attribute value is given
void FPtree::InsertTransaction(FP_NODE* &proot, HEADER_TABLE pheader_table, int* ptransaction, int length, int frequency, int ntgt_sup)
{
	FP_NODE *pcur_node, *pnew_node, *pparent, *pleftsib;
	int i, j;

	pcur_node = proot;
	pparent = NULL;
	pleftsib = NULL;
	for(i=0;i<length;i++)
	{
		while(pcur_node!=NULL && pcur_node->nitem_order<ptransaction[i])
		{
			pleftsib = pcur_node;
			pcur_node = pcur_node->prightsibling;
		}

		if(pcur_node==NULL || pcur_node->nitem_order>ptransaction[i])
		{
			pnew_node = NewOneFPNode();
			pnew_node->nitem_order = ptransaction[i];
			pnew_node->frequency = frequency;
			pnew_node->tgt_union.ntgt_sup = ntgt_sup;
			pnew_node->pchild = NULL;
			pnew_node->pparent = pparent;
			pnew_node->prightsibling = pcur_node;
			if(pleftsib!=NULL)
				pleftsib->prightsibling = pnew_node;
			else if(pparent!=NULL)
				pparent->pchild = pnew_node;
			pnew_node->pnode_link = pheader_table[pnew_node->nitem_order].pconddb;
			pheader_table[pnew_node->nitem_order].pconddb = pnew_node;
			if(i==0 && pleftsib==NULL)
				proot = pnew_node;
			pparent = pnew_node;
			for(j=i+1;j<length;j++)
			{
				pnew_node = NewOneFPNode();
				pnew_node->nitem_order = ptransaction[j];
				pnew_node->frequency = frequency;
				pnew_node->tgt_union.ntgt_sup = ntgt_sup;
				pnew_node->pchild = NULL;
				pnew_node->pparent = pparent;
				pparent->pchild = pnew_node;
				pnew_node->prightsibling = NULL;
				pnew_node->pnode_link = pheader_table[pnew_node->nitem_order].pconddb;
				pheader_table[pnew_node->nitem_order].pconddb = pnew_node;
				pparent = pnew_node;
			}
			break;
		}
		else 
		{
			pcur_node->frequency += frequency;
			pcur_node->tgt_union.ntgt_sup += ntgt_sup;
			pparent = pcur_node;
			pcur_node = pcur_node->pchild;
			pleftsib = NULL;
		}
	}
}

//the target attribute is nominal or ordinal, but no target attribute value is given
void FPtree::InsertTransaction(FP_NODE* &proot, HEADER_TABLE pheader_table, int* ptransaction, int length, int frequency, int* ptgt_sups)
{
	FP_NODE *pcur_node, *pnew_node, *pparent, *pleftsib;
	int i, j;

	pcur_node = proot;
	pparent = NULL;
	pleftsib = NULL;
	for(i=0;i<length;i++)
	{
		while(pcur_node!=NULL && pcur_node->nitem_order<ptransaction[i])
		{
			pleftsib = pcur_node;
			pcur_node = pcur_node->prightsibling;
		}

		if(pcur_node==NULL || pcur_node->nitem_order>ptransaction[i])
		{
			pnew_node = NewOneFPNode();
			pnew_node->nitem_order = ptransaction[i];
			pnew_node->frequency = frequency;
			memcpy(pnew_node->tgt_union.ptgt_sups, ptgt_sups, sizeof(int)*gnum_of_tgt_values);
			pnew_node->pchild = NULL;
			pnew_node->pparent = pparent;
			pnew_node->prightsibling = pcur_node;
			if(pleftsib!=NULL)
				pleftsib->prightsibling = pnew_node;
			else if(pparent!=NULL)
				pparent->pchild = pnew_node;
			pnew_node->pnode_link = pheader_table[pnew_node->nitem_order].pconddb;
			pheader_table[pnew_node->nitem_order].pconddb = pnew_node;
			if(i==0 && pleftsib==NULL)
				proot = pnew_node;
			pparent = pnew_node;
			for(j=i+1;j<length;j++)
			{
				pnew_node = NewOneFPNode();
				pnew_node->nitem_order = ptransaction[j];
				pnew_node->frequency = frequency;
				memcpy(pnew_node->tgt_union.ptgt_sups, ptgt_sups, sizeof(int)*gnum_of_tgt_values);
				pnew_node->pchild = NULL;
				pnew_node->pparent = pparent;
				pparent->pchild = pnew_node;
				pnew_node->prightsibling = NULL;
				pnew_node->pnode_link = pheader_table[pnew_node->nitem_order].pconddb;
				pheader_table[pnew_node->nitem_order].pconddb = pnew_node;
				pparent = pnew_node;
			}
			break;
		}
		else 
		{
			pcur_node->frequency += frequency;
			for(j=0;j<gnum_of_tgt_values;j++)
				pcur_node->tgt_union.ptgt_sups[j] += ptgt_sups[j];
			pparent = pcur_node;
			pcur_node = pcur_node->pchild;
			pleftsib = NULL;
		}
	}
}

//the target attribute is nominal or ordinal, but no target attribute value is given. 
//inserting from the original database
void FPtree::InsertTransaction(FP_NODE* &proot, HEADER_TABLE pheader_table, int* ptransaction, int length, int ntgt_value)
{
	FP_NODE *pcur_node, *pnew_node, *pparent, *pleftsib;
	int i, j;

	pcur_node = proot;
	pparent = NULL;
	pleftsib = NULL;
	for(i=0;i<length;i++)
	{
		while(pcur_node!=NULL && pcur_node->nitem_order<ptransaction[i])
		{
			pleftsib = pcur_node;
			pcur_node = pcur_node->prightsibling;
		}

		if(pcur_node==NULL || pcur_node->nitem_order>ptransaction[i])
		{
			pnew_node = NewOneFPNode();
			pnew_node->nitem_order = ptransaction[i];
			pnew_node->frequency = 1;
			memset(pnew_node->tgt_union.ptgt_sups, 0, sizeof(int)*gnum_of_tgt_values);
			pnew_node->tgt_union.ptgt_sups[ntgt_value] = 1;
			pnew_node->pchild = NULL;
			pnew_node->pparent = pparent;
			pnew_node->prightsibling = pcur_node;
			if(pleftsib!=NULL)
				pleftsib->prightsibling = pnew_node;
			else if(pparent!=NULL)
				pparent->pchild = pnew_node;
			pnew_node->pnode_link = pheader_table[pnew_node->nitem_order].pconddb;
			pheader_table[pnew_node->nitem_order].pconddb = pnew_node;
			if(i==0 && pleftsib==NULL)
				proot = pnew_node;
			pparent = pnew_node;
			for(j=i+1;j<length;j++)
			{
				pnew_node = NewOneFPNode();
				pnew_node->nitem_order = ptransaction[j];
				pnew_node->frequency = 1;
				memset(pnew_node->tgt_union.ptgt_sups, 0, sizeof(int)*gnum_of_tgt_values);
				pnew_node->tgt_union.ptgt_sups[ntgt_value] = 1;
				pnew_node->pchild = NULL;
				pnew_node->pparent = pparent;
				pparent->pchild = pnew_node;
				pnew_node->prightsibling = NULL;
				pnew_node->pnode_link = pheader_table[pnew_node->nitem_order].pconddb;
				pheader_table[pnew_node->nitem_order].pconddb = pnew_node;
				pparent = pnew_node;
			}
			break;
		}
		else 
		{
			pcur_node->frequency++;
			pcur_node->tgt_union.ptgt_sups[ntgt_value]++;
			pparent = pcur_node;
			pcur_node = pcur_node->pchild;
			pleftsib = NULL;
		}
	}
}

void FPtree::CountFreqItems(HEADER_TABLE pheader_table, int nitem_order, HEADER_NODE *pitem_sup_map, char* pnew_tgt_stat_array)
{
	FP_NODE *pitemnode, *pfpnode;
	int i;

	pitemnode = pheader_table[nitem_order].pconddb;

	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
	{
		TGT_SUM *ptgt_sum_array;

		ptgt_sum_array = (TGT_SUM*)pnew_tgt_stat_array;
		while(pitemnode!=NULL)
		{
			pfpnode = pitemnode->pparent;
			while(pfpnode!=NULL)
			{
				pitem_sup_map[pfpnode->nitem_order].nsupport += pitemnode->frequency;
				ptgt_sum_array[pfpnode->nitem_order].dsum += pitemnode->tgt_union.ptgt_sum->dsum;
				ptgt_sum_array[pfpnode->nitem_order].dsquare_sum += pitemnode->tgt_union.ptgt_sum->dsquare_sum;
				pfpnode = pfpnode->pparent;
			}
			pitemnode = pitemnode->pnode_link;
		}
	}
	else if(gsztarget_value[0]==0)
	{
		int *ptgt_sum_array;

		ptgt_sum_array = (int*)pnew_tgt_stat_array;
		while(pitemnode!=NULL)
		{
			pfpnode = pitemnode->pparent;
			while(pfpnode!=NULL)
			{
				pitem_sup_map[pfpnode->nitem_order].nsupport += pitemnode->frequency;
				for(i=0;i<gnum_of_tgt_values;i++)
					ptgt_sum_array[pfpnode->nitem_order*gnum_of_tgt_values+i] += pitemnode->tgt_union.ptgt_sups[i]; 
				pfpnode = pfpnode->pparent;
			}
			pitemnode = pitemnode->pnode_link;
		}
	}
	else
	{
		int *ptgt_sum_array;

		ptgt_sum_array = (int*)pnew_tgt_stat_array;
		while(pitemnode!=NULL)
		{
			pfpnode = pitemnode->pparent;
			while(pfpnode!=NULL)
			{
				pitem_sup_map[pfpnode->nitem_order].nsupport += pitemnode->frequency;
				ptgt_sum_array[pfpnode->nitem_order] += pitemnode->tgt_union.ntgt_sup;
				pfpnode = pfpnode->pparent;
			}
			pitemnode = pitemnode->pnode_link;
		}
	}
}

void CheckOrder(int *ptransaction, int ntrans_len)
{
	int i;

	for(i=1;i<ntrans_len;i++)
	{
		if(ptransaction[i]<ptransaction[i-1])
			printf("Error: wrong order in transaction\n");
	}
}


FP_NODE* FPtree::BuildNewFPTree(FP_NODE *pconddb, HEADER_TABLE pnewheader_table, int *pitem_order_map)
{
	FP_NODE *pnewroot, *pitemnode, *pfpnode;
	int npos, ntrans_len, *ptransaction;

	pnewroot = NULL;

	pitemnode = pconddb;
	while(pitemnode!=NULL)
	{
		pfpnode = pitemnode->pparent;
		npos = gnum_of_attrs-1;
		while(pfpnode!=NULL)
		{
			if(pitem_order_map[pfpnode->nitem_order]>=0)
				gptransaction[npos--] = pitem_order_map[pfpnode->nitem_order];
			pfpnode = pfpnode->pparent;
		}
		npos++;
		ptransaction = &(gptransaction[npos]);
		ntrans_len = gnum_of_attrs-npos;
		if(ntrans_len>1)
		{
			//CheckOrder(ptransaction, ntrans_len);
			if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
				InsertTransaction(pnewroot, pnewheader_table, ptransaction, ntrans_len, pitemnode->frequency, pitemnode->tgt_union.ptgt_sum->dsum, pitemnode->tgt_union.ptgt_sum->dsquare_sum);
			else if(gsztarget_value[0]==0)
				InsertTransaction(pnewroot, pnewheader_table, ptransaction, ntrans_len, pitemnode->frequency, pitemnode->tgt_union.ptgt_sups);
			else
				InsertTransaction(pnewroot, pnewheader_table, ptransaction, ntrans_len, pitemnode->frequency, pitemnode->tgt_union.ntgt_sup);
		}
		pitemnode = pitemnode->pnode_link;
	}

	return pnewroot;
}



void FPtree::DepthFGGrowth(FP_NODE *proot, HEADER_TABLE pheader_table, char* ptgt_stat_array, int num_of_items)
{
	HEADER_TABLE pnewheader_table;
	FP_NODE *pnewroot, *pfpnode;
	FPNODE_PAGE *pstart_page;
	int k, i, nstart_pos, nitem, nlen, num_of_new_items;
	char *pnew_tgt_stat_array;

	gntotal_call++;

	pnewheader_table = NewHeaderTable(num_of_items, pnew_tgt_stat_array);

	for(k=0;k<num_of_items;k++)
	{
		gpprefix_itemset[gnprefix_len] = pheader_table[k].nitem;
		gnprefix_len++;
		gndepth++;

		if(!gpAttrValues[pheader_table[k].nitem].bis_context_item)
		{
			if(gpAttrValues[pheader_table[k].nitem].bis_comparing_item)
				gnnon_context_comp_item_num++;
			else
				gnother_item_num++;
		}

//if(gnprefix_len==7 && gpprefix_itemset[1]==107 && gpprefix_itemset[2]==11 && gpprefix_itemset[3]==95 &&
//   gpprefix_itemset[4]==1 && gpprefix_itemset[5]==14 && gpprefix_itemset[6]==67)
//	printf("stop\n");


		//when the path contains one non-context comparing item, then the candidate extensions of the pattern are 
		//all non-context comparing items. When gnmaterialization_mode==FREQ_ONLY, then each path can contain at most
		//one non-context item and it must be a comparing item; when gnmaterialization_mode==FREQ_N_INFREQ_BORDER, then each
		//path should contain at most two non-context items
		if(gnmaterialization_mode==FREQ_ONLY && gnnon_context_comp_item_num>=1 || 
			gnmaterialization_mode==FREQ_N_INFREQ_BORDER && gnnon_context_comp_item_num+gnother_item_num>=2)
		{
			for(i=0;i<k;i++)
			{
				if(gpAttrValues[pheader_table[i].nitem].bis_context_item || !gpAttrValues[pheader_table[i].nitem].bis_comparing_item)
					printf("Error: the item should be a non-context comparing item\n");
			}
			if(pheader_table[k].nsupport>=gnmin_sup)
				gotree_bufmanager.PruneNonClosed(gpprefix_itemset, gnprefix_len, pheader_table[k].nsupport, 1);
		}
		else if(pheader_table[k].pconddb==NULL)
		{
			if(pheader_table[k].nsupport>=gnmin_sup)
				gotree_bufmanager.PruneNonClosed(gpprefix_itemset, gnprefix_len, pheader_table[k].nsupport, 1);
		}
		else //if(pheader_table[k].pconddb!=NULL)
		{
			if(pheader_table[k].pconddb->pnode_link==NULL)
			{
				nlen = 0;
				pfpnode = pheader_table[k].pconddb->pparent;
				while(pfpnode!=NULL)
				{
					nitem = pheader_table[pfpnode->nitem_order].nitem;
					if(gpAttrValues[nitem].bis_context_item || gpAttrValues[nitem].bis_comparing_item)
					{
						gpprefix_itemset[gnprefix_len] = nitem;
						if(gnmaterialization_mode==FREQ_ONLY && pheader_table[k].pconddb->frequency>=gnmin_sup || 
							gnmaterialization_mode==FREQ_N_INFREQ_BORDER && (pheader_table[k].nsupport>=gnmin_sup || pheader_table[pfpnode->nitem_order].nsupport>=gnmin_sup ||
							gnprefix_len-gnfirst_level_len>1 && gotree_bufmanager.IsOnBorder(gpprefix_itemset, gnprefix_len+1, pheader_table[k].pconddb->frequency)))
							gpsingle_branch[nlen++] = nitem; //pheader_table[pfpnode->nitem_order].nitem;
					}
					pfpnode = pfpnode->pparent;
				}
				if(nlen>0)
				{
					if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
						gotree_bufmanager.WriteLeafNode(gpsingle_branch, nlen, pheader_table[k].pconddb->frequency, (char*)pheader_table[k].pconddb->tgt_union.ptgt_sum);
					else if(gsztarget_value[0]==0)
						gotree_bufmanager.WriteLeafNode(gpsingle_branch, nlen, pheader_table[k].pconddb->frequency, (char*)pheader_table[k].pconddb->tgt_union.ptgt_sups);
					else
						gotree_bufmanager.WriteLeafNode(gpsingle_branch, nlen, pheader_table[k].pconddb->frequency, (char*)&pheader_table[k].pconddb->tgt_union.ntgt_sup);
					memcpy(&gpprefix_itemset[gnprefix_len], gpsingle_branch, sizeof(int)*nlen);
				}
				if(pheader_table[k].nsupport>=gnmin_sup)
				{
					if(pheader_table[k].nsupport>pheader_table[k].pconddb->frequency)
					{
						gotree_bufmanager.PruneNonClosed(gpprefix_itemset, gnprefix_len, pheader_table[k].nsupport, 1);
						if(nlen>0 && pheader_table[k].pconddb->frequency>=gnmin_sup)							
							gotree_bufmanager.PruneNonClosed(gpprefix_itemset, gnprefix_len+nlen, pheader_table[k].pconddb->frequency, 0);
					}
					else if(nlen>0)
						gotree_bufmanager.PruneNonClosed(gpprefix_itemset, gnprefix_len+nlen, pheader_table[k].nsupport, 0);
					else
						gotree_bufmanager.PruneNonClosed(gpprefix_itemset, gnprefix_len, pheader_table[k].nsupport, 1);
				}
			}
			else
			{
				//count frequent items from FP-tree
				memset(pnewheader_table, 0, sizeof(HEADER_NODE)*k);
				memset(pnew_tgt_stat_array, 0, gntgt_stat_size*k);
				CountFreqItems(pheader_table, k, pnewheader_table, pnew_tgt_stat_array);

				num_of_new_items = 0;
				//num_of_freq_comp_items = 0;
				nlen = 0;
				for(i=0;i<k;i++)
				{
					if((gpAttrValues[pheader_table[i].nitem].bis_context_item || 
						gpAttrValues[pheader_table[i].nitem].bis_comparing_item) && pnewheader_table[i].nsupport>0)
					{
						gpprefix_itemset[gnprefix_len] = pheader_table[i].nitem;
						if(gnmaterialization_mode==FREQ_ONLY && pnewheader_table[i].nsupport>=gnmin_sup || 
							gnmaterialization_mode==FREQ_N_INFREQ_BORDER && (pheader_table[k].nsupport>=gnmin_sup || 
							pheader_table[i].nsupport>=gnmin_sup ||	gnprefix_len-gnfirst_level_len>1 && 
							gotree_bufmanager.IsOnBorder(gpprefix_itemset, gnprefix_len+1, pnewheader_table[i].nsupport)))
						{
							if(pnewheader_table[i].nsupport==pheader_table[k].nsupport)
								gpsingle_branch[nlen++] = pheader_table[i].nitem;
							else
							{
								pnewheader_table[num_of_new_items].nitem = pheader_table[i].nitem;
								pnewheader_table[num_of_new_items].nsupport = pnewheader_table[i].nsupport;
								pnewheader_table[num_of_new_items].order = i;
								memcpy(&pnew_tgt_stat_array[num_of_new_items*gntgt_stat_size], &pnew_tgt_stat_array[i*gntgt_stat_size], gntgt_stat_size);
								num_of_new_items++;
								//if(gpAttrValues[pheader_table[i].nitem].bis_comparing_item && pnewheader_table[i].nsupport>=gnmin_sup)
								//	num_of_freq_comp_items++;
							}
						}
					}
					gpdfs_item_order_map[i] = -1;
				}

				if(nlen>0)
				{
					gotree_bufmanager.InsertCommonPrefixNode(gpsingle_branch, nlen, pheader_table[k].nsupport, &ptgt_stat_array[k*gntgt_stat_size]);
					memcpy(&gpprefix_itemset[gnprefix_len], gpsingle_branch, sizeof(int)*nlen);
					gnprefix_len += nlen;
				}
				if(pheader_table[k].nsupport>=gnmin_sup)
				{
					if(nlen>0)
						gotree_bufmanager.PruneNonClosed(gpprefix_itemset, gnprefix_len, pheader_table[k].nsupport, 0);
					else
						gotree_bufmanager.PruneNonClosed(gpprefix_itemset, gnprefix_len, pheader_table[k].nsupport, 1);
				}

				if(num_of_new_items==1)
				{
					if(pnewheader_table[0].nsupport>=gnmin_sup)
					{
						gpprefix_itemset[gnprefix_len] = pnewheader_table[0].nitem;
						gotree_bufmanager.PruneNonClosed(gpprefix_itemset, gnprefix_len+1, pnewheader_table[0].nsupport, 0);
					}
					gotree_bufmanager.WriteLeafNode(&pnewheader_table[0].nitem, 1, pnewheader_table[0].nsupport, pnew_tgt_stat_array);
				}
				else if(num_of_new_items>1)
				{
					gotree_bufmanager.InsertInternalNode(pnewheader_table, pnew_tgt_stat_array, num_of_new_items);

					if(gndepth<gnmax_len+1)
					{
						for(i=0;i<num_of_new_items;i++)
							gpdfs_item_order_map[pnewheader_table[i].order] = i;

						pstart_page = gofpnode_buf.pcur_page;
						nstart_pos = gofpnode_buf.ncur_pos;
						pnewroot = BuildNewFPTree(pheader_table[k].pconddb, pnewheader_table, gpdfs_item_order_map);

						DepthFGGrowth(pnewroot, pnewheader_table, pnew_tgt_stat_array, num_of_new_items);
						Reset(pstart_page, nstart_pos);
					}
					else
					{
						for(i=0;i<num_of_new_items;i++)
							gotree_bufmanager.MoveNext();
					}

					gotree_bufmanager.WriteInternalNode();
				}
				if(nlen>0)
				{
					gotree_bufmanager.WriteCommonPrefixNode(1);
					gnprefix_len -= nlen;
				}
			}
		}

		gndepth--;
		gnprefix_len--;
		gotree_bufmanager.MoveNext();
		if(!gpAttrValues[pheader_table[k].nitem].bis_context_item)
		{
			if(gpAttrValues[pheader_table[k].nitem].bis_comparing_item)
				gnnon_context_comp_item_num--;
			else
				gnother_item_num--;
		}
	}

	DelHeaderTable(pnewheader_table, pnew_tgt_stat_array, num_of_items);
}


//==================================================================================================

//----------- for rule/hypotheses exploration ----------------
void OutputItemSup(char* szoutput_name)
{
	FILE *fp;
	char szoutput_filename[200];
	int i;

	sprintf(szoutput_filename, "%s.itemsup", szoutput_name);
	fp = fopen(szoutput_filename, "wt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for write\n", szoutput_filename);
		return;
	}

	for(i=0;i<gnum_of_items;i++)
		fprintf(fp, "%d\n", gpAttrValues[i].nsup);

	fclose(fp);
}

void FPtree::OpenTdbFiles(char* szoutput_name)
{
	char sztdb_filename[200], szdir_filename[200], sztgt_filename[200];

	sprintf(sztdb_filename, "%s.tdb", szoutput_name);
	mfp_tdb = fopen(sztdb_filename, "wb");
	if(mfp_tdb==NULL)
	{
		printf("Error: cannot open file %s for write\n", sztdb_filename);
		return;
	}

	sprintf(szdir_filename, "%s.tdb.dir", szoutput_name);
	mfp_tdb_dir = fopen(szdir_filename, "wb");
	if(mfp_tdb_dir==NULL)
	{
		printf("Error: cannot open file %s for write\n", szdir_filename);
		return;
	}
	fwrite(&gndb_size, sizeof(int), 1, mfp_tdb_dir);

	sprintf(sztgt_filename, "%s.tgtvalues", szoutput_name);
	mfp_tgt = fopen(sztgt_filename, "wb");
	if(mfp_tgt==NULL)
	{
		printf("Error: cannot open file %s for write\n", sztgt_filename);
		return;
	}

	mfp_tdb_pos = 0;
	mntrans_num = 0;
}

void FPtree::OutputTgtValue(FILE *fp, void *ptgt_value)
{
	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
		fwrite(ptgt_value, sizeof(double), 1, fp);
	else 
		fwrite(ptgt_value, sizeof(int), 1, fp);
}


//if the target value is specified, then the target value is mapped to 1 and other values are mapped to 0
void FPtree::OutputOneTrans(int *ptrans, int nlen, void* ptgt_value)
{
	mntrans_num++;

	qsort(ptrans, nlen, sizeof(int), comp_int_asc);
	fwrite(&nlen, sizeof(int), 1, mfp_tdb);
	if(nlen>0)
	{
		fwrite(ptrans, sizeof(int), nlen, mfp_tdb);
		//OutputTgtValue(mfp_tdb, ptgt_value);
	}
	mfp_tdb_pos += sizeof(int)*nlen+sizeof(int);

	fwrite(&mfp_tdb_pos, sizeof(int), 1, mfp_tdb_dir);

	OutputTgtValue(mfp_tgt, ptgt_value);
	//if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
	//	fwrite(ptgt_value, sizeof(float), 1, mfp_tgt);
	//else 
	//	fwrite(ptgt_value, sizeof(int), 1, mfp_tgt);
}

void FPtree::CloseTdbFiles()
{
	if(mntrans_num!=gndb_size)
	{
		fseek(mfp_tdb_dir, 0, SEEK_SET);
		fwrite(&mntrans_num, sizeof(int), 1, mfp_tdb_dir);
	}
	fclose(mfp_tdb);
	fclose(mfp_tdb_dir);
	fclose(mfp_tgt);
}

