#include <string>
#include <vector>
#include <map>
using namespace std;

#include "global.h"
#include "cfptree_ehta.h"

//extern char prefix[]; //for GUI

int GetParameters(char* szpara_filename)
{
	FILE *fp;
	char ch, szpara_name[100], szvalue[100];
	int nlen;
	map<string, int>::iterator map_it;

	gszdata_filename[0] = 0;
	gsznames_filename[0] = 0;
	gszoutput_filename[0] = 0;
	//sprintf(gszoutput_filename, "%s", prefix); // for GUI
	gsztarget_attr[0] = 0;
	gsztarget_value[0] = 0;
	gpcontext_attr_map = new map<string, int>;
	gpcomparing_attr_map = new map<string, int>;

	gnmin_sup = 100;
	gdmin_sup = 0.1; 
	gdmax_pvalue = 0.05;
	gdmin_diff = 0;
	gnmax_len = 5;

	gnsignal_type = IS_HYPOTHESIS;
	gnmaterialization_mode = FREQ_ONLY; 
	//gnmaterialization_mode = FREQ_N_INFREQ_BORDER;
	gncompare_mode = PAIR_WISE; 
	gneffect_size_method = CONFIDENCE;
	gntest_statisitic_method = FISHER_EXACT_TEST;
	gnPVALUE_BUF_SIZE = (1<<23);
	gnoutput_mode = OUTPUT_CLOSED_N_SINGLETON;

	gdmax_local_pvalue = 0.05;
	gncorrection_method = -1;
	gnum_of_repetitions = 1000;
	gnseeding_method = SEEDING_GLOBAL;
	gndata_perm_method = DATA_PERM_SWAP;
	gbdiff_tid_list = true;
	gbgen_tid_list = false;

	fp = fopen(szpara_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szpara_filename);
		return -1;
	}

	ch = fgetc(fp);
	while(!feof(fp))
	{
		if(ch=='#')
		{
			while(!feof(fp) && ch!='\n')
				ch = fgetc(fp);
		}
		else
		{
			nlen = 0;
			while(!feof(fp) && ch!='=' && ch!='\n')
			{
				szpara_name[nlen++] = ch;
				ch = fgetc(fp);
			}
			szpara_name[nlen] = 0;
			if(ch=='=')
				ch = fgetc(fp);

			if(strcmp(szpara_name, "context_attributes")==0)
			{
				while(!feof(fp) && ch!='\n')
				{
					nlen = 0;
					while(!feof(fp) && ch!=',' && ch!='\n')
					{
						szvalue[nlen++] = ch;
						ch = fgetc(fp);
					}
					szvalue[nlen] = 0;
					if(nlen>0)
						(*gpcontext_attr_map)[szvalue] = 1;
					if(ch!='\n')
						ch = fgetc(fp);
				}
			}
			else if(strcmp(szpara_name, "comparing_attributes")==0)
			{
				while(!feof(fp) && ch!='\n')
				{
					nlen = 0;
					while(!feof(fp) && ch!=',' && ch!='\n')
					{
						szvalue[nlen++] = ch;
						ch = fgetc(fp);
					}
					szvalue[nlen] = 0;
					if(nlen>0)
						(*gpcomparing_attr_map)[szvalue] = 1;
					if(ch!='\n')
						ch = fgetc(fp);
				}
			}
			else
			{
				nlen = 0;
				while(!feof(fp) && ch!='\n' && ch!='\r')
				{
					szvalue[nlen++] = ch;
					ch = fgetc(fp);
				}
				szvalue[nlen] = 0;

				if(strcmp(szpara_name, "data_file")==0)
					strcpy(gszdata_filename, szvalue);
				else if(strcmp(szpara_name, "names_file")==0)
					strcpy(gsznames_filename, szvalue);
				else if(strcmp(szpara_name, "output")==0)
					strcpy(gszoutput_filename, szvalue);
				else if(strcmp(szpara_name, "min_sup")==0)
				{
					//gnmin_sup = atoi(szvalue);
					gdmin_sup = atof(szvalue);
					if(gdmin_sup>=1)
						gnmin_sup = (int)gdmin_sup;
					else if(gnmin_sup<=0)
					{
						gdmin_sup = 0.1;
						printf("Please specify a positive number as the minimum support threshold. Use default threshold 0.1.\n");
					}
				}
				else if(strcmp(szpara_name, "max_pvalue")==0)
				{
					gdmax_pvalue = atof(szvalue);
					if(gdmax_pvalue>1 || gdmax_pvalue<0)
					{
						gdmax_pvalue = 0.05;
						printf("Please specify a number between 0 and 1 as the maximum p-value threshold. Use default threshold 0.05.\n");
					}
				}
				else if(strcmp(szpara_name, "min_effect_size")==0)
				{
					gdmin_diff = atof(szvalue);
					if(gdmin_diff<0)
					{
						gdmin_diff = -gdmin_diff;
						printf("Please specify a positive value as the minimum difference threshold. Use the reverse value %.3f.\n", gdmin_diff);
					}
				}
				else if(strcmp(szpara_name, "max_len")==0)
				{
					gnmax_len = atoi(szvalue);
					if(gnmax_len<=0)
					{
						gnmax_len = 20;
						printf("Please specify a positive value as the minimum length threshold. Use default value 20.\n");
					}
				}
				else if(strcmp(szpara_name, "target_attribute")==0)
					strcpy(gsztarget_attr, szvalue);
				else if(strcmp(szpara_name, "target_value")==0)
					strcpy(gsztarget_value, szvalue);
				else if(strcmp(szpara_name, "test-statistic")==0)
				{
					strlwr(szvalue);
					if(strcmp(szvalue, "x2")==0)
						gntest_statisitic_method = X2_TEST;
					else if(strcmp(szvalue, "fisher's exact test")==0)
						gntest_statisitic_method = FISHER_EXACT_TEST;
					else 
						gntest_statisitic_method = -1;
				}
				else if(strcmp(szpara_name, "materialization_mode")==0)
				{
					strlwr(szvalue);
					if(strcmp(szvalue, "freq only")==0)
						gnmaterialization_mode = FREQ_ONLY;
					else if(strcmp(szvalue, "freq + infreq border")==0)
						gnmaterialization_mode = FREQ_N_INFREQ_BORDER;
				}
				else if(strcmp(szpara_name, "compare_mode")==0)
				{
					strlwr(szvalue);
					if(strcmp(szvalue, "pairwise")==0)
						gncompare_mode = PAIR_WISE;
					else if(strcmp(szvalue, "attribute-wise")==0)
						gncompare_mode = ATTRIBUTE_WISE;
				}
				else if(strcmp(szpara_name, "output_mode")==0)
				{
					strlwr(szvalue);
					if(strcmp(szvalue, "all")==0)
						gnoutput_mode = OUTPUT_CLOSED;
					else //if(strcmp(szvalue, "representative")==0)
						gnoutput_mode = OUTPUT_REPRESENTATIVE;
				}
				else if(strcmp(szpara_name, "signal_type")==0)
				{
					strlwr(szvalue);
					if(strcmp(szvalue, "rule")==0)
						gnsignal_type = IS_RULE;
					else 
						gnsignal_type = IS_HYPOTHESIS;
				}
				else if(strcmp(szpara_name, "effect_size_method")==0)
				{
					strlwr(szvalue);
					if(strcmp(szvalue, "odds_ratio")==0)
						gneffect_size_method = ODDS_RATIO;
					else 
						gneffect_size_method = CONFIDENCE;
				}
				else if(strcmp(szpara_name, "max_local_pvalue")==0)
				{
					gdmax_local_pvalue = atof(szvalue);
					if(gdmax_local_pvalue>1 || gdmax_local_pvalue<0)
					{
						gdmax_local_pvalue = 0.05;
						printf("Please specify a number between 0 and 1 as the maximum local p-value threshold. Use default threshold 0.05.\n");
					}
				}
				else if(strcmp(szpara_name, "correction_method")==0)
				{
					strlwr(szvalue);
					if(strcmp(szvalue, "permutation")==0)
						gncorrection_method = PERMUTATION;
					else if(strcmp(szvalue, "permutation_superset")==0)
						gncorrection_method = PERMUTATION_SUPERSET;
					else if(strcmp(szvalue, "simulated_perm")==0)
						gncorrection_method = SIMULATED_PERM;
					else if(strcmp(szvalue, "simulated_perm_subset")==0)
						gncorrection_method = SIMULATED_PERM_SUBSET;
					else if(strcmp(szvalue, "simulated_perm_subset_sib")==0)
						gncorrection_method = SIMULATED_PERM_SUBSET_SIB;
					else if(strcmp(szvalue, "simulated_perm_subset_left_sib")==0)
						gncorrection_method = SIMULATED_PERM_SUBSET_LEFT_SIB;
					else if(strcmp(szvalue, "simulated_perm_subset_allsib")==0)
						gncorrection_method = SIMULATED_PERM_SUBSET_ALLSIB;
					else if(strcmp(szvalue, "simulated_perm_superset")==0)
						gncorrection_method = SIMULATED_PERM_SUPERSET;
					else if(strcmp(szvalue, "simulated_perm_hybrid")==0)
						gncorrection_method = SIMULATED_PERM_HYBRID;
					else if(strcmp(szvalue, "simulated_perm_subsets")==0)
						gncorrection_method = SIMULATED_PERM_SUBSETS;
				}
				else if(strcmp(szpara_name, "num_of_repetitions")==0)
				{
					gnum_of_repetitions = atoi(szvalue);
					if(gnum_of_repetitions<=0)
					{
						gnum_of_repetitions = 1000;
						printf("Please specify a positive integer as the number of repetitions. Use default value 1000.\n");
					}
				}
				else if(strcmp(szpara_name, "seeding_method")==0)
				{
					strlwr(szvalue);
					if(strcmp(szvalue, "global")==0)
						gnseeding_method = SEEDING_GLOBAL;
					else if(strcmp(szvalue, "per rule")==0)
						gnseeding_method = SEEDING_PER_RULE;
					else if(strcmp(szvalue, "per perm")==0)
						gnseeding_method = SEEDING_PER_PERM;
				}				
				else if(strcmp(szpara_name, "data_perm_method")==0)
				{
					strlwr(szvalue);
					if(strcmp(szvalue, "rand tid")==0)
						gndata_perm_method = DATA_PERM_RAND_TID;
					else if(strcmp(szvalue, "rand tid class")==0)
						gndata_perm_method = DATA_PERM_RAND_TID_CLASS;
					else if(strcmp(szvalue, "swap")==0)
						gndata_perm_method = DATA_PERM_SWAP;
				}
				else if(strcmp(szpara_name, "pvalue_buf_size")==0)
				{
					gnPVALUE_BUF_SIZE = (int)(atof(szvalue)*(1<<20));
					if(gnPVALUE_BUF_SIZE<0)
						gnPVALUE_BUF_SIZE = 0;
				}
				else if(strcmp(szpara_name, "perm_diff_list")==0)
				{
					strlwr(szvalue);
					if(strcmp(szvalue, "no")==0)
						gbdiff_tid_list = false;
					else if(strcmp(szvalue, "yes")==0)
						gbdiff_tid_list = true;
				}
				else if(strcmp(szpara_name, "gen_tid_list")==0)
				{
					strlwr(szvalue);
					if(strcmp(szvalue, "no")==0)
						gbgen_tid_list = false;
					else if(strcmp(szvalue, "yes")==0)
						gbgen_tid_list = true;
				}
			}
		}

		if(ch=='\n')
			ch = fgetc(fp);
	}
	fclose(fp);

	if(gncorrection_method>=0)
	{
		gnmaterialization_mode = FREQ_ONLY;
		gntest_statisitic_method = FISHER_EXACT_TEST;
	}

	if(!(gnsignal_type==IS_HYPOTHESIS && gncompare_mode==PAIR_WISE && gsztarget_value[0]!=0) && gnmaterialization_mode!=FREQ_ONLY)
		gnmaterialization_mode = FREQ_ONLY;

	if(gdmin_diff<0)
	{
		printf("Please specify a postive value as the minimum difference threshold.\n");
		return -1;
	}
	if(gszdata_filename[0]==0)
	{
		printf("Please specify the data filename\n");
		return -1;
	}
	if(gsznames_filename[0]==0)
	{
		printf("Please specify the names filename\n");
		return -1;
	}
	if(gszoutput_filename[0]==0)
	{
		printf("Please specify the output filename\n");
		return -1;
	}
	if(gsztarget_attr[0]==0)
	{
		printf("Please specify the target attribute\n");
		return -1;
	}

	printf("Parameter settings:\n");
	printf("-------------------\n");
	printf("  data_file=%s\n", gszdata_filename);
	printf("  names_file=%s\n", gsznames_filename);
	//printf("  min_sup=%d\n", gnmin_sup);
	printf("  max_pvalue=%.2E\n", gdmax_pvalue);
	printf("  min_effect_size=%.2E\n", gdmin_diff);
	printf("  max_len=%d\n", gnmax_len);
	printf("  target_attribute=%s\n", gsztarget_attr);
	printf("  target_value=%s\n", gsztarget_value);

	printf("  context-attributes=");
	if(!gpcontext_attr_map->empty())
	{
		map_it=gpcontext_attr_map->begin();
		printf("%s", map_it->first.c_str());
		map_it++;
		while(map_it!=gpcontext_attr_map->end())
		{
			printf(",%s", map_it->first.c_str());
			map_it++;
		}
	}
	printf("\n");

	printf("  comparing-attributes=");
	if(!gpcomparing_attr_map->empty())
	{
		map_it=gpcomparing_attr_map->begin();
		printf("%s", map_it->first.c_str());
		map_it++;
		while(map_it!=gpcomparing_attr_map->end())
		{
			printf(",%s", map_it->first.c_str());
			map_it++;
		}
	}
	printf("\n");

	if(gntest_statisitic_method>=0)
	{
		printf("  test-statistic=");
		if(gntest_statisitic_method==X2_TEST)
			printf("X2\n");
		else if(gntest_statisitic_method==FISHER_EXACT_TEST)
			printf("fisher's exact test\n");
	}
	printf("  materialization_mode=");
	if(gnmaterialization_mode==FREQ_ONLY)
		printf("freq only\n");
	else 
		printf("freq + infreq border\n");		
	
	printf("  compare_mode=");
	if(gncompare_mode==PAIR_WISE)
		printf("pairwise\n");
	else 
		printf("attribute-wise\n");

	printf("  output_mode=");
	if(gnoutput_mode==OUTPUT_CLOSED)
		printf("CLOSED\n");
	else 
		printf("representative\n");
	
	printf("  output=%s\n", gszoutput_filename);
	printf("\n");

	return 0;
}


void LoadTgtValues(char* sztgtvalue_filename)
{
	FILE *fp;
	char ch, sztgtvalue[100];
	int nlen;

	fp = fopen(sztgtvalue_filename, "rt");
	if(fp==NULL)
	{
		//printf("Error: cannot open file %s for read\n", sztgtvalue_filename);
		return;
	}

	ch = fgetc(fp);
	while(!feof(fp))
	{
		nlen = 0;
		while(!feof(fp) && ch!='\n')
		{
			sztgtvalue[nlen++] = ch;
			ch = fgetc(fp);
		}
		sztgtvalue[nlen] = 0;

		gsztgt_values->push_back(sztgtvalue);

		ch = fgetc(fp);
	}
	fclose(fp);
}

void LoadTransTgtValues(char* szoutput_name, void *ptgt_values)
{
	FILE *fp;
	char sztgt_filename[200];

	sprintf(sztgt_filename, "%s.tgtvalues", szoutput_name);
	fp = fopen(sztgt_filename, "rb");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for write\n", sztgt_filename);
		return;
	}

	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
		fread(ptgt_values, sizeof(double), gndb_size, fp);
	else
		fread(ptgt_values, sizeof(int), gndb_size, fp);

	fclose(fp);
}

void LoadTransTgtValues(char* szoutput_name)
{
	FILE *fp;
	char sztgt_filename[200];

	sprintf(sztgt_filename, "%s.tgtvalues", szoutput_name);
	fp = fopen(sztgt_filename, "rb");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for write\n", sztgt_filename);
		return;
	}

	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
	{
		gptgt_values = NewDoubleArray(gndb_size);
		fread(gptgt_values, sizeof(double), gndb_size, fp);
	}
	else
	{
		gpint_tgt_values = NewIntArray(gndb_size);
		fread(gpint_tgt_values, sizeof(int), gndb_size, fp);
	}

	fclose(fp);
}

void DelTransTgtValues()
{
	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
	{
		DelDoubleArray(gptgt_values, gndb_size);
		gptgt_values = NULL;
	}
	else
	{
		DelIntArray(gpint_tgt_values, gndb_size);
		gpint_tgt_values = NULL;
	}
}


void LoadAttrNValues(char* szattrvalue_filename)
{
	FILE *fp;
	char ch, szprev_attr_name[100], szattr_name[100];
	int nlen, num_of_attrs, num_of_items, nattr_buf_pos, nvalue_buf_pos;
	int i, j, nattr_no, norder;
	map<string, int>::iterator map_it;

	gnum_of_attrs = 0;
	gnattr_name_buf_size = 0;
	gnattr_value_buf_size = 0;
	gnum_of_items = 0;

	fp = fopen(szattrvalue_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szattrvalue_filename);
		return;
	}

	szprev_attr_name[0] = 0;
	ch = fgetc(fp);
	while(!feof(fp))
	{
		nlen = 0;
		while(!feof(fp) && ch!='=' && ch!='\n')
		{
			szattr_name[nlen++] = ch;
			ch = fgetc(fp);
		}
		szattr_name[nlen] = 0;
		if(strcmp(szprev_attr_name, szattr_name))
		{
			gnum_of_attrs++;
			gnattr_name_buf_size += nlen+1;
			strcpy(szprev_attr_name, szattr_name);
		}

		while(!feof(fp) && ch!='\n')
		{
			nlen++;
			ch = fgetc(fp);
		}
		gnum_of_items++;
		gnattr_value_buf_size += nlen+1;

		if(ch=='\n')
			ch = fgetc(fp);
	}
	rewind(fp);

	gpAttributes = new ATTRIBUTE[gnum_of_attrs];
	IncMemSize(sizeof(ATTRIBUTE)*gnum_of_attrs);
	gszattr_name_buf = new char[gnattr_name_buf_size];
	IncMemSize(sizeof(char)*gnattr_name_buf_size);
	nattr_buf_pos = 0;

	gpAttrValues = new ATTR_VALUE[gnum_of_items];
	IncMemSize(sizeof(ATTR_VALUE)*gnum_of_items);
	gszattr_value_buf = new char[gnattr_value_buf_size];
	IncMemSize(sizeof(char)*gnattr_value_buf_size);
	nvalue_buf_pos = 0;

	num_of_attrs = 0;
	num_of_items = 0;

	szprev_attr_name[0] = 0;
	ch = fgetc(fp);
	while(!feof(fp))
	{
		nlen = 0;
		while(!feof(fp) && ch!='=' && ch!='\n')
		{
			szattr_name[nlen++] = ch;
			ch = fgetc(fp);
		}
		szattr_name[nlen] = 0;
		if(strcmp(szprev_attr_name, szattr_name))
		{
			gpAttributes[num_of_attrs].szattr_name = &gszattr_name_buf[nattr_buf_pos];
			nattr_buf_pos += nlen+1;
			strcpy(gpAttributes[num_of_attrs].szattr_name, szattr_name);
			gpAttributes[num_of_attrs].num_of_values = 0;
			gpAttributes[num_of_attrs].nstart_item_id = num_of_items;
			//gpAttributes[num_of_attrs].nattr_no = num_of_attrs;
			num_of_attrs++;
			norder = 0;

			strcpy(szprev_attr_name, szattr_name);
		}
		gpAttributes[num_of_attrs-1].num_of_values++;

		while(!feof(fp) && ch!='\n')
		{
			szattr_name[nlen++] = ch;
			ch = fgetc(fp);
		}
		szattr_name[nlen] = 0;
		gpAttrValues[num_of_items].szattr_value = &gszattr_value_buf[nvalue_buf_pos];
		nvalue_buf_pos += nlen+1;
		strcpy(gpAttrValues[num_of_items].szattr_value, szattr_name);
		gpAttrValues[num_of_items].nattr_no = num_of_attrs-1;
		//here the norder field refers to the order of the item among all the values of the attribute
		gpAttrValues[num_of_items].norder = norder;
		num_of_items++;
		norder++;

		if(ch=='\n')
			ch = fgetc(fp);
	}
	fclose(fp);

	gnmax_item_num_per_attr = 0;
	for(i=0;i<gnum_of_attrs;i++)
	{
		if(gnmax_item_num_per_attr<gpAttributes[i].num_of_values)
			gnmax_item_num_per_attr = gpAttributes[i].num_of_values;
	}

	if(num_of_attrs!=gnum_of_attrs)
		printf("Error: inconsistent number of attributes\n");
	if(num_of_items!=gnum_of_items)
		printf("Error: inconsistent number of items\n");
	if(nattr_buf_pos!=gnattr_name_buf_size)
		printf("Error: inconsistent buffer size of attribute name buffer\n");
	if(nvalue_buf_pos!=gnattr_value_buf_size)
		printf("Error: inconsistent buffer size of attribute value buffer\n");

	for(i=0;i<gnum_of_items;i++)
	{
		nattr_no = gpAttrValues[i].nattr_no;
		if(!strstr(gpAttrValues[i].szattr_value, gpAttributes[nattr_no].szattr_name))
			printf("Error: the attribute name should be a sub-string of the attirbute value\n");
	}
	for(i=0;i<gnum_of_attrs-1;i++)
	{
		if(gpAttributes[i].nstart_item_id+gpAttributes[i].num_of_values!=gpAttributes[i+1].nstart_item_id)
			printf("Error: inconsistent number of values\n");
	}
	if(gpAttributes[i].nstart_item_id+gpAttributes[i].num_of_values!=gnum_of_items)
		printf("Error: inconsistent number of values\n");

	gnum_of_cmp_attrs = 0;

	for(i=0;i<gnum_of_attrs;i++)
	{
		if(!gpcontext_attr_map->empty())
		{
			map_it = gpcontext_attr_map->find(gpAttributes[i].szattr_name);
			if(map_it!=gpcontext_attr_map->end())
				gpAttributes[i].bis_context_attr = true;
			else
				gpAttributes[i].bis_context_attr = false;
		}
		else
			gpAttributes[i].bis_context_attr = true;

		if(!gpcomparing_attr_map->empty())
		{
			map_it = gpcomparing_attr_map->find(gpAttributes[i].szattr_name);
			if(map_it!=gpcomparing_attr_map->end())
				gpAttributes[i].bis_comparing_attr = true;
			else
				gpAttributes[i].bis_comparing_attr = false;
		}
		else
			gpAttributes[i].bis_comparing_attr = true;

		for(j=gpAttributes[i].nstart_item_id;j<gpAttributes[i].nstart_item_id+gpAttributes[i].num_of_values;j++)
		{
			if(gpAttrValues[j].norder!=j-gpAttributes[i].nstart_item_id)
				printf("Error: inconsistent norder field\n");
			gpAttrValues[j].bis_comparing_item = gpAttributes[i].bis_comparing_attr;
			gpAttrValues[j].bis_context_item = gpAttributes[i].bis_context_attr;
		}

		if(gpAttributes[i].bis_comparing_attr)
		{
			gpAttributes[i].norder = gnum_of_cmp_attrs;
			gnum_of_cmp_attrs++;
		}
		else
			gpAttributes[i].norder = -1;
	}

	LoadAttrType(gsznames_filename);
}

void OutputAttrNames(char* szoutput_filename)
{
	FILE *fpout;
	int i;

	fpout = fopen(szoutput_filename, "wt");
	if(fpout==NULL)
	{
		printf("Error: cannot open file %s for write\n", szoutput_filename);
		return;
	}

	for(i=0;i<gnum_of_attrs;i++)
		fprintf(fpout, "%s\n", gpAttributes[i].szattr_name);

	fclose(fpout);
}

void LoadAttrType(char *sznames_filename)
{
	FILE *fp;
	char ch, szname[100], szvalue[100];
	int nlen, i;
	map<string, int> attr_map;
	map<string, int>::iterator map_it;

	fp = fopen(sznames_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", sznames_filename);
		return;
	}

	for(i=0;i<gnum_of_attrs;i++)
		attr_map[gpAttributes[i].szattr_name] = i;

	ch = fgetc(fp);
	while(!feof(fp))
	{
		nlen = 0;
		while(!feof(fp) && ch!=':' && ch!='\n')
		{
			szname[nlen++] = ch;
			ch = fgetc(fp);
		}
		szname[nlen] = 0;
		if(ch!=':')
			printf("Error: ':' expected\n");
		else
			ch = fgetc(fp);

		while(!feof(fp) && (ch==' ' || ch=='\t') && ch!='\n')
			ch = fgetc(fp);

		nlen = 0;
		while(!feof(fp) && ch!='\n')
		{
			szvalue[nlen++] = ch;
			ch = fgetc(fp);
		}
		szvalue[nlen] = 0;

		map_it = attr_map.find(szname);
		if(map_it!=attr_map.end())
		{
			if(strncmp(szvalue, "continuous-normal", 17)==0)
				gpAttributes[map_it->second].nattr_type = CONTINUOUS_NORMAL;
			else if(strncmp(szvalue, "continuous", 10)==0)
				gpAttributes[map_it->second].nattr_type = CONTINUOUS;
			else if(strncmp(szvalue, "nominal", 7)==0)
				gpAttributes[map_it->second].nattr_type = NOMINAL;
			else if(strncmp(szvalue, "ordinal", 7)==0)
				gpAttributes[map_it->second].nattr_type = ORDINAL;
			else 
			{
				gpAttributes[map_it->second].nattr_type = -1;
				printf("Error: unknown attribute type: %s\n", szvalue);
			}
		}
		else if(strcmp(szname, gsztarget_attr)==0)
		{
			if(strncmp(szvalue, "continuous-normal", 17)==0)
				gntgt_attr_type = CONTINUOUS_NORMAL;
			else if(strncmp(szvalue, "continuous", 10)==0)
				gntgt_attr_type = CONTINUOUS;
			else if(strncmp(szvalue, "nominal", 7)==0)
				gntgt_attr_type = NOMINAL;
			else if(strncmp(szvalue, "ordinal", 7)==0)
				gntgt_attr_type = ORDINAL;
			else 
			{
				gntgt_attr_type = -1;
				printf("Error: unknown attribute type: %s\n", szvalue);
			}
		}

		if(ch=='\n')
			ch = fgetc(fp);
	}
	fclose(fp);
}

void LoadFilterConds(char* szfilter_filename, map<string, int> *pname2id_map, char* pbitmap, int nmax_id)
{
	FILE *fp;
	char ch, szname[200], szstatus[100], szerrmsg[300];
	int nlen;
	map<string, int>::iterator map_it;

	fp = fopen(szfilter_filename, "rt");
	if(fp==NULL)
	{
		sprintf(szerrmsg, "Error: cannot open file %s for read\n", szfilter_filename);
		LogErrMsg("", "LoadFilterConds", szerrmsg);
		return;
	}

	ch = fgetc(fp);
	while(!feof(fp))
	{
		nlen = 0;
		while(!feof(fp) && ch!='\t' && ch!='\n')
		{
			szname[nlen++] = ch;
			ch = fgetc(fp);
		}
		szname[nlen] = 0;

		while(!feof(fp) && ch=='\t')
			ch = fgetc(fp);

		nlen = 0;
		while(!feof(fp) && ch!=' \t' && ch!='\n')
		{
			szstatus[nlen++] = ch;
			ch = fgetc(fp);
		}
		szstatus[nlen] = 0;

		if(szname[0]!=0 && szstatus[0]!=0)
		{
			map_it = pname2id_map->find(szname);
			if(map_it!=pname2id_map->end())
			{
				if(map_it->second<nmax_id)
				{
					strlwr(szstatus);
					if(strcmp(szstatus, "exclude")==0)
						pbitmap[map_it->second] = EXCLUDE;
					else if(strcmp(szstatus, "include")==0)
						pbitmap[map_it->second] = INCLUDE;
					else if(strcmp(szstatus, "optional")==0)
						pbitmap[map_it->second] = OPTIONAL;
					else
					{
						sprintf("Error: wrong status %s", szstatus);
						LogErrMsg("", "LoadFilterConds", szerrmsg);
						pbitmap[map_it->second] = OPTIONAL;
					}
				}
				else
				{
					sprintf(szerrmsg, "Error: the id is larger than %d", nmax_id);
					LogErrMsg("", "LoadFilterConds", szerrmsg);
				}
			}
			else
			{
				sprintf(szerrmsg, "Error: cannot find %s", szname);
				LogErrMsg("", "LoadFilterConds", szerrmsg);
			}
		}

		if(ch=='\n')
			ch = fgetc(fp);
	}
	fclose(fp);
}

void CheckAttrNValueStatus(char* pattr_bitmap, char* pitem_bitmap)
{
	char szerrmsg[200];
	int i, j, num_of_excludes, num_of_includes, num_of_optionals;

	for(i=0;i<gnum_of_attrs;i++)
	{
		num_of_excludes = 0;
		num_of_includes = 0;
		num_of_optionals = 0;
		for(j=gpAttributes[i].nstart_item_id;j<gpAttributes[i].nstart_item_id+gpAttributes[i].num_of_values;j++)
		{
			if(pitem_bitmap[j]==EXCLUDE)
				num_of_excludes++;
			else if(pitem_bitmap[j]==INCLUDE)
				num_of_includes++;
			else
				num_of_optionals++;
		}

		if(pattr_bitmap[i]==EXCLUDE)
		{
			if(num_of_excludes<gpAttributes[i].num_of_values)
			{
				sprintf(szerrmsg, "Error: all values of attribute %s should be marked as `exclude'", gpAttributes[i].szattr_name);
				LogErrMsg("", "CheckAttrNValueStatus", szerrmsg);
			}
		}
		else if(pattr_bitmap[i]==INCLUDE)
		{
			if(num_of_includes>1)
			{
				sprintf(szerrmsg, "Error: more than one value of attribute %s are marked as `include'", gpAttributes[i].szattr_name);
				LogErrMsg("", "CheckAttrNValueStatus", szerrmsg);
			}
			else if(num_of_includes==1)
			{
				if(num_of_optionals>0)
				{
					sprintf(szerrmsg, "Error: values of attribute %s should be marked as 'exclude'", gpAttributes[i].szattr_name);
					LogErrMsg("", "CheckAttrNValueStatus", szerrmsg);
				}
			}
			else if(num_of_excludes==gpAttributes[i].num_of_values)
			{
				sprintf(szerrmsg, "Error: values of attribute %s cannot all be marked as `exclude'", gpAttrValues[j].szattr_value, gpAttributes[i].szattr_name);
				LogErrMsg("", "CheckAttrNValueStatus", szerrmsg);
			}
		}
		else
		{
			if(num_of_includes>0)
			{
				sprintf(szerrmsg, "Error: none of the values of attribute %s should be marked as `include'", gpAttributes[i].szattr_name);
				LogErrMsg("", "CheckAttrNValueStatus", szerrmsg);
			}
			else if(num_of_excludes==gpAttributes[i].num_of_values)
			{
				sprintf(szerrmsg, "Error: values of attribute %s cannot all be marked as `exclude'", gpAttrValues[j].szattr_value, gpAttributes[i].szattr_name);
				LogErrMsg("", "CheckAttrNValueStatus", szerrmsg);
			}
		}
	}
}

void LoadCompAttr(char* szcomp_filename, map<string, int> *pname2id_map, char* pbitmap, int nmax_id)
{
	FILE *fp;
	char ch, szname[200], szerrmsg[300];
	int nlen, nid;
	map<string, int>::iterator map_it;

	fp = fopen(szcomp_filename, "rt");
	if(fp==NULL)
	{
		sprintf(szerrmsg, "Error: cannot open file %s for read\n", szcomp_filename);
		LogErrMsg("", "LoadFilterConds", szerrmsg);
		return;
	}

	ch = fgetc(fp);
	while(!feof(fp))
	{
		nlen = 0;
		while(!feof(fp) && ch!='\n')
		{
			szname[nlen++] = ch;
			ch = fgetc(fp);
		}
		szname[nlen] = 0;


		if(szname[0]!=0)
		{
			map_it = pname2id_map->find(szname);
			if(map_it!=pname2id_map->end())
			{
				nid = map_it->second;
				if(map_it->second<nmax_id)
					pbitmap[map_it->second] |= IS_COMPARE;
				else
				{
					sprintf(szerrmsg, "Error: the id is larger than %d", nmax_id);
					LogErrMsg("", "LoadFilterConds", szerrmsg);
				}
			}
			else
			{
				sprintf(szerrmsg, "Error: cannot find %s", szname);
				LogErrMsg("", "LoadFilterConds", szerrmsg);
			}
		}

		if(ch=='\n')
			ch = fgetc(fp);
	}
	fclose(fp);
}

void CheckCompAttrNItems(char* pattr_bitmap, char* pitem_bitmap)
{
	char szerrmsg[200];
	int i, j, num_of_comp_items, nattr_no;

	for(i=0;i<gnum_of_attrs;i++)
	{
		if(pattr_bitmap[i] & IS_COMPARE)
		{
			num_of_comp_items = 0;
			for(j=gpAttributes[i].nstart_item_id;j<gpAttributes[i].nstart_item_id+gpAttributes[i].num_of_values;j++)
			{
				if(pitem_bitmap[j] & IS_COMPARE)
					num_of_comp_items++;
			}
			if(num_of_comp_items<2)
			{
				sprintf(szerrmsg, "Error: at least 2 items of attribute %s should be selected as comparing items.", gpAttributes[i].szattr_name);
				LogErrMsg("", "CheckCompAttrNItems", szerrmsg);
			}
		}
	}
	
	for(i=0;i<gnum_of_items;i++)
	{
		if(pitem_bitmap[i] & IS_COMPARE)
		{
			nattr_no = gpAttrValues[i].nattr_no;
			if((pattr_bitmap[nattr_no] & IS_COMPARE)==0 )
			{
				sprintf(szerrmsg, "Error: item %s is selected as comparing item but attribute %s is not.", gpAttrValues[i].szattr_value, gpAttributes[nattr_no].szattr_name);
				LogErrMsg("", "CheckCompAttrNItems", szerrmsg);
			}
		}
	}
}


//-----------------------------------------
void ReadTreeStatis(char* szcfp_stat_filename)
{
	FILE *fp;
	int num_of_attrs, num_of_items, ntgt_attr_type, i;

	fp = fopen(szcfp_stat_filename, "rt");
	if(fp==NULL)
	{
		printf("Error: cannot open file %s for read\n", szcfp_stat_filename);
		return;
	}

	fscanf(fp, "%d", &gndb_size);
	fscanf(fp, "%d", &gnmin_sup);
	fscanf(fp, "%d", &gnmax_sup);
	fscanf(fp, "%d", &num_of_attrs);
	fscanf(fp, "%d", &num_of_items);
	fscanf(fp, "%d", &gnum_of_freq_items);
	fscanf(fp, "%d", &gntree_size);
	fscanf(fp, "%d", &gntree_page_size);
	fscanf(fp, "%d", &gnum_of_entries);
	fscanf(fp, "%d", &gnmax_tree_depth);
	fscanf(fp, "%d", &gnmax_pattern_len);
	fscanf(fp, "%d", &gnmax_dfsentries_len);
	fscanf(fp, "%d", &gnmax_dfs_sup_sum);
	fscanf(fp, "%d", &ntgt_attr_type);
	if(ntgt_attr_type!=gntgt_attr_type)
		gntgt_attr_type = ntgt_attr_type;
	if(gntgt_attr_type==CONTINUOUS || gntgt_attr_type==CONTINUOUS_NORMAL)
	{
		gntgt_stat_size = sizeof(TGT_SUM);
		gptgt_stat_array = NewCharArray(gntgt_stat_size);
		fscanf(fp, "%lf %lf", (double*)gptgt_stat_array, (double*)&gptgt_stat_array[sizeof(double)]);

	}
	else if(gsztarget_value[0]!=0)
	{
		gntgt_stat_size = sizeof(int);
		gptgt_stat_array = NewCharArray(gntgt_stat_size);
		fscanf(fp, "%d", (int*)gptgt_stat_array);
		gdconf = (double)((int*)gptgt_stat_array)[0]/gndb_size;
	}
	else
	{
		fscanf(fp, "%d ", &gnum_of_tgt_values);
		gntgt_stat_size = sizeof(int)*gnum_of_tgt_values;
		gptgt_stat_array = NewCharArray(gntgt_stat_size);
		for(i=0;i<gnum_of_tgt_values;i++)
			fscanf(fp, "%d ", (int*)&gptgt_stat_array[i*sizeof(int)]);	
	}
	
	if(gnum_of_attrs==0)
		gnum_of_attrs = num_of_attrs;
	if(gnum_of_items==0)
		gnum_of_items = num_of_items;

	fclose(fp);
}

