#include "global.h"

#include <fstream>
#include <ctime>
#include <iostream>

void partition(char *szdataset_name, char *szoutput_name, std::vector<int> &mapping)
{
	char file[200], output[200];
	sprintf(file,"%s.data",szdataset_name);
	std::ifstream in(file, std::ios::in);

	std::vector<std::string> instances;

	while(!in.eof()) {
		std::string s;
		std::getline(in,s);
		if(s.length()==0) continue;
		instances.push_back(s);
	}
	in.close();

	std::cout << instances.size() <<std::endl;

	std::vector<int> pos, neg;
	int num_of_neg=0, num_of_pos=0;

	for(int i=0; i<instances.size(); i++) {
		if(instances[i].at(0)=='1') {
			pos.push_back(i);
			num_of_pos++;
		}
		else {
			neg.push_back(i);
			num_of_neg++;
		}
	}

	std::cout << pos.size() << " " << neg.size() << std::endl;

	mapping.resize(instances.size());

	srand(clock());

	int half;
	half = num_of_neg/2;
	std::vector<bool> chosen_neg(num_of_neg,false);
	for(int i=0; i<half; ) {
		int index = int(rand()/(RAND_MAX+1.0)*num_of_neg);
		if(chosen_neg[index]) continue;
		i++;
		chosen_neg[index]=true;
	}

	half = num_of_pos/2;
	std::vector<bool> chosen_pos(num_of_pos,false);
	for(int i=0; i<half; ) {
		int index = int(rand()/(RAND_MAX+1.0)*num_of_pos);
		if(chosen_pos[index]) continue;
		i++;
		chosen_pos[index]=true;
	}

	std::vector<int> file1, file2;
	for(int i=0; i<num_of_pos; i++) {
		if(chosen_pos[i]) file1.push_back(pos[i]);
		else file2.push_back(pos[i]);
	}
	for(int i=0; i<num_of_neg; i++) {
		if(chosen_neg[i]) file1.push_back(neg[i]);
		else file2.push_back(neg[i]);
	}

	for(int k=0; k<file1.size(); k++)
		mapping[file1[k]] = k;
	for(int k=0; k<file2.size(); k++)
		mapping[file2[k]] = k+file1.size();

	//output
	sprintf(output, "%s_%d.data", szoutput_name, 0);
	std::ofstream f_out(output,std::ios::out);
	for(int k=0; k<file1.size(); k++)
		f_out << instances[file1[k]] << std::endl;
	f_out.close();

	sprintf(output, "%s_%d.data", szoutput_name, 1);
	std::ofstream s_out(output,std::ios::out);
	for(int k=0; k<file2.size(); k++)
		s_out << instances[file2[k]] << std::endl;
	s_out.close();
}

void map_tid(char *szdataset_name, char *szoutput_name, std::vector<int> &mapping)
{
	char file[200], output[200];
	sprintf(file,"%s.tid",szdataset_name);
	sprintf(output,"%s.tid",szoutput_name);
	std::ifstream in(file, std::ios::in);
	std::ofstream out(output, std::ios::out);

	while(!in.eof()) {
		std::string s;
		std::getline(in,s);
		if(s.length()==0) continue;
		
		std::vector<std::string> tokens;
		split(s,tokens, ' ');
		out << tokens[0] << " ";
		for(int i=1; i<tokens.size(); i++)
			out << mapping[atoi(tokens[i].c_str())] << " ";
		out << std::endl;

	}
	in.close();
	out.close();
}

void copy_namefile(char *szdataset_name, char *szoutput_name)
{
	char buf[300];
	sprintf(buf, "copy %s.names %s.names\n", szdataset_name, szoutput_name);
	system(buf);
}

void copy_rulefile(char *szdataset_name, char *szoutput_name)
{
	char buf[300];
	sprintf(buf, "copy %s.rules %s.rules\n", szdataset_name, szoutput_name);
	system(buf);
}

void rm_data()
{
	char buf[300];
	sprintf(buf, "del /Q .\\datasets\\*");
	system(buf);

}
