package sg.edu.nus;

/*
 * author: Zhengkui Wang
 * 
 * National university of singapore
 */

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.*;

/*
 *two-locus data format
 *|first_snp|second_snp|x^2value|first_GT|Second_GT|PT|sampleid_bit_list|
 *|5|5|5|1|1|1|334| 
 * 
 */

public class TopKMiningMapper extends MapReduceBase implements
		Mapper<LongWritable, Text, IntWritable, SingleTwoLocusData> {
	int topK = 0;
	// this one is used to store the local top k two-locus data
	// Hash<x^2 value, the two-locus data information>
	HashMap<Integer, TwoLocusDataCollection> topkMap = new HashMap<Integer, TwoLocusDataCollection>();
	int numOfTuples = 0;
	int minValue = 0;
	byte[] firstSnp = new byte[5];
	byte[] secondSnp = new byte[5];
	byte[] x2Value = new byte[5];
	byte PT;
	byte[] valueArray;
	int iFirstSnp = 0;
	int iSecondSnp = 0;
	int iFirstSnpValue = 0;
	int iSecondSnpValue = 0;
	int iPhenotype = 0;
	int iX2Value = 0;
	int idsBitLength = 334;
	IntWritable outputKey = new IntWritable();
	TwoLocusDataCollection newTuple = null; // store a two-locus information may
	private OutputCollector<IntWritable, SingleTwoLocusData> localOutput = null;

	@Override
	public void configure(JobConf job) {
		this.topK = job.getInt("topk.num", 1);
	}

	public void map(LongWritable key, Text value,
			OutputCollector<IntWritable, SingleTwoLocusData> output,
			Reporter reporter) {
		this.localOutput = output;
		SingleTwoLocusData newRecord = new SingleTwoLocusData();
		Text tmpValue = value;
		valueArray = tmpValue.getBytes();
		if (valueArray.length != 352)
			System.out.println("the length is not 352");
		if (numOfTuples < this.topK) {
			newRecord = makeNewSingleTwoLocus(valueArray);
			// to speed up processing, pass the whole two-locus data into the
			// bit part.
			if (newTuple == null) {

				newTuple = new TwoLocusDataCollection(this.iFirstSnp,
						this.iSecondSnp, this.iX2Value, newRecord);

			} else {
				if (newTuple.getFirstSnp() == this.iFirstSnp
						&& newTuple.getScondSnp() == this.iSecondSnp) {
					newTuple.addNewTuple(newRecord);
				} else {
					topkMap.put(newTuple.getX2Value(), newTuple);
					this.numOfTuples++;
					if (this.minValue > newTuple.getX2Value()) {
						this.minValue = newTuple.getX2Value();
					}
					newTuple = new TwoLocusDataCollection(this.iFirstSnp,
							this.iSecondSnp, this.iX2Value, newRecord);
				}

			}
			// }
		} // end of if (numOfTuples < this.topk)
		else {

			newRecord = makeNewSingleTwoLocus(valueArray);
			if (this.minValue < this.iX2Value) {
				if (newTuple.getFirstSnp() == this.iFirstSnp
						&& newTuple.getScondSnp() == this.iSecondSnp)
					newTuple.addNewTuple(newRecord);
				else {
					if (newTuple.getX2Value() > this.minValue) {
						this.topkMap.remove(this.minValue);

						this.topkMap.put(newTuple.getX2Value(), newTuple);
						this.minValue = getMinValue(this.topkMap);
						if (this.minValue < this.iX2Value)
							newTuple = new TwoLocusDataCollection(
									this.iFirstSnp, this.iSecondSnp,
									this.iX2Value, newRecord);
					}
				}
			}

		}// end of else

	}

	/*
	 * Out put the local top k two-locus SNPs information from the topkmap list
	 */
	ArrayList<SingleTwoLocusData> hashArrayList;

	@Override
	public void close() {
		if (newTuple != null && this.minValue < newTuple.getX2Value()) {
			this.topkMap.remove(this.minValue);
			this.topkMap.put(newTuple.getX2Value(), newTuple);
		}
		/*
		 * Output the local top k snps data
		 */
		Iterator iterator = this.topkMap.values().iterator();

		while (iterator.hasNext()) {
			TwoLocusDataCollection tmpTwoLocusCollection = (TwoLocusDataCollection) iterator
					.next();
			hashArrayList = tmpTwoLocusCollection.getValueList();

			int tmpX2Value = tmpTwoLocusCollection.getX2Value();
			for (int i = 0; i < hashArrayList.size(); i++) {
				try {
					outputKey.set(tmpX2Value);
					this.localOutput.collect(outputKey, hashArrayList.get(i));
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}

	}

	/*
	 * parse the two-locus data information from the whole bytes list Store it
	 * into one singleTwoLocusData object
	 */
	SingleTwoLocusData makeNewSingleTwoLocus(byte[] valueArray) {
		SingleTwoLocusData tmpSingle = new SingleTwoLocusData();
		for (int i = 0; i < 5; i++) {
			firstSnp[i] = valueArray[this.idsBitLength + 13 + i];
			secondSnp[i] = valueArray[this.idsBitLength + 8 + i];
			x2Value[i] = valueArray[this.idsBitLength + 3 + i];
		}
		this.iFirstSnp = Converter.byteToInt2(firstSnp);
		this.iSecondSnp = Converter.byteToInt2(secondSnp);
		this.iX2Value = Converter.byteToInt2(x2Value);
		this.iFirstSnpValue = Converter
				.singleByteToInt(valueArray[this.idsBitLength + 1]);
		this.iSecondSnpValue = Converter
				.singleByteToInt(valueArray[this.idsBitLength]);
		this.iPhenotype = Converter
				.singleByteToInt(valueArray[this.idsBitLength + 2]);
		tmpSingle.setAllValues(this.iFirstSnp, this.iSecondSnp,
				this.iFirstSnpValue, this.iSecondSnpValue, this.iX2Value,
				this.iPhenotype, this.valueArray);
		return tmpSingle;
	}

	int getMinValue(HashMap<Integer, TwoLocusDataCollection> topkmap) {
		Iterator iterator = topkmap.keySet().iterator();
		int minValue = 0;
		while (iterator.hasNext()) {
			int tmp = Integer.valueOf(iterator.next().toString());
			if (minValue > tmp) {
				minValue = tmp;
			}
		}
		return minValue;
	}
}
