package sg.edu.nus;

/*
 * author: Zhengkui Wang
 * 
 * National university of singapore
 */

import org.apache.hadoop.fs.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;

/*=============================================================
 *  This file is used to set which analysis you want to do, including preprocessing,
 *  two-locus analysis, three-locus analysis and top-k retrieval
 =================================================================*/

public class GeneProcessor extends Configured implements Tool {

	public int run(String[] args) throws Exception {

		/********************************************************
		 * Choose the statistic method which you want to use
		 * 1 for Chi-square test (CS)
		 * 2 for Likelihood Ratio test (LR)
		 * 3 for Normal Mutual Information (NMI)
		 * 4 for Uncertainty Coefficient (UC)
		 ********************************************************/
		
		int statisticMethod=1;
		/********************************************************
		 * Job setting for the data pre-processing job
		 ********************************************************/
		JobConf preProcess = new JobConf(getConf(), getClass());
		preProcess.setJobName("GeneProcessing program for 100 SNP");
		Path preprocessInputPath = new Path(args[0]);
		Path preprocessOutputPath = new Path(args[1]);
		Path twoSnpsOutputPath = new Path(args[2]);
		Path threeSnpsOutPutPath = new Path(args[3]);		
		Path topKOutputPath=new Path(args[4]);
		preProcess.setOutputKeyClass(NullWritable.class);
		preProcess.setOutputValueClass(Text.class);
		preProcess.setInputFormat(TextInputFormat.class);
		preProcess.setOutputFormat(TextOutputFormat.class);
		preProcess.setMapOutputKeyClass(Text.class);
		preProcess.setMapOutputValueClass(Text.class);		
		preProcess.setMapperClass(PreprocessMapper.class);
		preProcess.setReducerClass(PreprocessReducer.class);
		//preProcess.setNumMapTasks(4);
		preProcess.setNumReduceTasks(3);
		FileInputFormat.setInputPaths(preProcess, preprocessInputPath);
		FileOutputFormat.setOutputPath(preProcess, preprocessOutputPath);
	 	JobClient.runJob(preProcess);
		
		/*************************************************************
		 * Job setting for the two-locus analysis job in Greedy Model
		 **************************************************************/
	 	JobConf twoLocus = new JobConf(getConf(), getClass());
		int snpNum=100;
		int reducerNum=3; // how many reducers in the application to use
		String splittor=CaculatorSplittor(snpNum,reducerNum);
		twoLocus.setJobName("Two SNPS Combination program for"+snpNum+" SNPs" + reducerNum+" reducer");
		twoLocus.setInputFormat(TextInputFormat.class);
		twoLocus.setOutputFormat(TextOutputFormat.class);
		twoLocus.setMapOutputKeyClass(DoubleWritable.class);
		twoLocus.setMapOutputValueClass(Text.class);
		twoLocus.set("mapred.child.java.opts", "-Xmx1300m");
		twoLocus.setInt("statistic.method",statisticMethod);
		twoLocus.setInt("snp.num", snpNum);
		twoLocus.setInt("reducer.num", reducerNum);
		twoLocus.set("splittor", splittor);
		twoLocus.setNumReduceTasks(reducerNum);
		twoLocus.setMapperClass(TwoSnpsMapperGreedy.class);
		twoLocus.setReducerClass(TwoSnpsReducerGreedy.class);
		twoLocus.setPartitionerClass(TwoSnpsPartitionerGreedy.class);
		FileInputFormat.setInputPaths(twoLocus, preprocessOutputPath);
		FileOutputFormat.setOutputPath(twoLocus, twoSnpsOutputPath);
	 	JobClient.runJob(twoLocus);
	 
		/*********************************************************************
		 * Job setting for the two-locus analysis job in square-chopping Model
		 ***********************************************************************/
	/* 	JobConf twoLocus = new JobConf(getConf(), getClass());
		int snpNum=100;
		int partitionNum=2;
		//String splittor=CaculatorSplittor(snpNum,reducerNum);
		twoLocus.setJobName("Two SNPS Combination program for"+snpNum+ "SNPs " +  partitionNum+ " partitions");
		twoLocus.setOutputValueClass(Text.class);
		twoLocus.setInputFormat(TextInputFormat.class);
		twoLocus.setOutputFormat(TextOutputFormat.class);
		twoLocus.setMapOutputKeyClass(Text.class);
		twoLocus.set("mapred.child.java.opts", "-Xmx1200m");
		twoLocus.setInt("snp.num", snpNum);
		twoLocus.setInt("partition.num", partitionNum);		 
		twoLocus.setNumReduceTasks(partitionNum*(partitionNum+1)/2);
		twoLocus.setMapperClass(TwoSnpsMapperSquareChopping.class);
		twoLocus.setReducerClass(TwoSnpsReducerSquareChopping.class);
		twoLocus.setPartitionerClass(TwoSnpsPartitionerSquareChopping.class);
		FileInputFormat.setInputPaths(twoLocus, preprocessOutputPath);
		FileOutputFormat.setOutputPath(twoLocus, twoSnpsOutputPath);
	 	JobClient.runJob(twoLocus);
		*/ 
	 	/*************************************************************************
	 	 * Job setting for the top k Retrival job from two-locus analysis result
	 	 **************************************************************************/
		JobConf topK = new JobConf(getConf(), getClass());
		int numTopK=10;
		int reducerTopKNum=1;  // how many reducers in the application to use
		topK.setJobName("Top K mining program");
		topK.setOutputValueClass(Text.class);
		topK.setOutputKeyClass(Text.class);
		topK.setInputFormat(TextInputFormat.class);
		topK.setMapOutputKeyClass(IntWritable.class);
		topK.setMapOutputValueClass(SingleTwoLocusData.class);
		topK.setOutputFormat(TextOutputFormat.class);
		topK.set("mapred.child.java.opts", "-Xmx1200m");
		topK.setInt("statistic.method", statisticMethod);
		topK.setInt("topk.num", numTopK);
		topK.setNumReduceTasks(reducerTopKNum);
		topK.setMapperClass(TopKMiningMapper.class);
		topK.setReducerClass(TopKMingReducer.class);
		topK.setOutputKeyComparatorClass(TopKDesSort.class);
		FileInputFormat.setInputPaths(topK, twoSnpsOutputPath);
		FileOutputFormat.setOutputPath(topK, topKOutputPath);
	 	JobClient.runJob(topK);
	 	/***********************************************************
	 	 * Job setting for the three-locus analysis job 
	 	 *************************************************************/
  	 	JobConf threeLocus = new JobConf(getConf(), getClass());
		int reducer3=4; // how many reducers in the application
		threeLocus.setJobName("Three SNPS Combination program for " +snpNum+" SNPs"+ reducer3+" reducers");
		threeLocus.setInputFormat(TextInputFormat.class);
		threeLocus.setOutputValueClass(Text.class);
		threeLocus.setMapOutputKeyClass(DoubleWritable.class);
		threeLocus.setOutputFormat(TextOutputFormat.class);
		threeLocus.set("mapred.child.java.opts", "-Xmx1200m");
		threeLocus.setInt("snp.num", snpNum);
		threeLocus.setInt("reducer.num", reducer3);
		threeLocus.setNumReduceTasks(reducer3);
		String splittor3=CaculatorSplittor(snpNum,reducer3);
		threeLocus.set("splittor", splittor3);
		threeLocus.setMapperClass(ThreeSnpsMapper.class);
		threeLocus.setReducerClass(ThreeSnpsReducer.class);
		threeLocus.setPartitionerClass(ThreeSnpsPartitioner.class);
		FileInputFormat.setInputPaths(threeLocus, twoSnpsOutputPath);
		FileOutputFormat.setOutputPath(threeLocus, threeSnpsOutPutPath);
		JobClient.runJob(threeLocus);
		

		return 0;
	}

	/*
	 * Given SNP number and Reducer number
	 * Caculator the splittor for rows distribution to each reducer
	 */
	private String CaculatorSplittor(int snpnum, int reducernum) {

		long [] split=new long[reducernum];
		split=loadingBalancing(CombinationNum(snpnum),reducernum,snpnum);
		String splittor=null;
		splittor=getSplittor(split);
		return splittor;	
	}
	static String getSplittor(long [] split)
	{
		String splittor=null;
		splittor=String.valueOf(split[0]);
		for(int i=1;i<split.length;i++)
		{
			splittor=splittor+","+String.valueOf(split[i]);
		}
		return splittor;
	}
	
	/*
	 * loading balancing algorithm for Greedy 
	 */
	static long [] loadingBalancing(long totalcom, int renum, int snpnum)
	{
	
		double avg=(double) Math.ceil(totalcom/renum);
		int halfReduer=renum/2;
		long subTotal=0;
		long [] split= new long[renum];
		int reducer=0;
		for( int j=snpnum-1, i=1;reducer < renum-1 && j>0 ;++i, j=snpnum-i)
		{
			if(subTotal+j < avg)
			{
				subTotal=subTotal+j;
			}
			else
			{
				split[reducer]= i;
				reducer =reducer+1;
				subTotal =0;
			}		
		}
		if(reducer <= renum-1)
		{
			long left=snpnum-split[reducer-1];
			int leftReducer=renum - reducer;
			long spend= left/leftReducer;
			for(;reducer<renum; reducer++)
			{
				if(reducer!= renum-1)
				{
					split[reducer]=split[reducer-1]+spend;
				}
				else
					split[reducer]=snpnum;
			}
		}
		return split;
	}
	/*
	 * Calculation the combination number for n snps
	 */
	static long CombinationNum(int n)
	{
		long result;
		result =(long)n/2*(n-1);
		return result;
	}
	/*
	 * Print out execution time for all jobs in seconds
	 */
	public static String printComputationDurationInSeconds(long duration) {
		long durationInSeconds = duration / 1000;
		String result = "Elapsed Time = " + durationInSeconds + " sec";
		return (result);

	}

	public static void main(String[] args) throws Exception {
		long startTime;
		long endTime;
		long totalTime;
		startTime = System.currentTimeMillis();
		int exitCode = ToolRunner.run(new GeneProcessor(), args);
		endTime = System.currentTimeMillis();
		totalTime = endTime - startTime;
		System.out.println("The total execution time is: "
				+ printComputationDurationInSeconds(totalTime));
		System.exit(exitCode);
	}
	


}
