package sg.edu.nus;

/*
 * author: Zhengkui Wang
 * 
 * National university of singapore
 */
import java.io.*;
import java.util.*;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;

/*********************************************
 * The mapper for the data pre-processing part
 **********************************************/
public class PreprocessMapper extends MapReduceBase implements
		Mapper<LongWritable, Text, Text, Text> {

	private Text value = new Text();
	private Text key = new Text();

	public void map(LongWritable key, Text value,
			OutputCollector<Text, Text> output, Reporter reporter) {
		processTuple(key.toString(), value.toString(), output);
	}

	private void processTuple(String sKey, String sValue,
			OutputCollector<Text, Text> output) // throws Exception
	{

		String sToken = null;
		if ((sValue == null) || (sValue.length() == 0)
				|| (sValue.trim() == null) || (sKey == null)
				|| (sKey.length() == 0))
			return;
		String[] str = sValue.split("\t");
		int disease = Integer.parseInt(str[str.length - 1]);
		String sampleId = str[0];
		try {
			for (int i = 1; i < str.length - 1; i++) {
				key.set(String.valueOf(i) + " " + disease + " " + str[i]);
				value.set(sampleId);
				output.collect(key, value);
			}

		}// end of try block
		catch (IOException exc) {
			System.err.println("An error occured [" + exc.getMessage() + "]");
		}
	}// end of ProcessTuple function
}
