//-----------------------------------------------
// Copyright 2016 Guangxi University
// Written by Liang Zhao(S080011@e.ntu.edu.sg)
// Released under the GPL
//-----------------------------------------------
//
// candidate - Obtain the candidate erroneous bases of input reads
// It's developed based on SGA originally writen by Jared Simpson (js18@sanger.ac.uk)
//
#include <iostream>
#include <fstream>
#include <sstream>
#include <iterator>
#include "Util.h"
#include "candidate.h"
#include "SuffixArray.h"
#include "BWT.h"
#include "MECCommon.h"
#include "Timer.h"
#include "BWTAlgorithms.h"
#include "gzstream.h"
#include "SequenceProcessFramework.h"
#include "CandidateProcess.h"
#include "BWTIntervalCache.h"
#include "Mask.h"

//
// Getopt
//
#define SUBPROGRAM "candidate"
static const char *CANDIDATE_VERSION_MESSAGE =
SUBPROGRAM " Version " PACKAGE_VERSION "\n";

static const char *CANDIDATE_USAGE_MESSAGE =
"Usage: " PACKAGE_NAME " " SUBPROGRAM " [OPTION] ... READSFILE\n"
"Obtain the candidate erroneous bases in READSFILE\n"
"\n"
"    --help                   display this help and exit\n"
"    -p, --prefix=PREFIX      use PREFIX for the names of the index files (default: prefix of the input file)\n"
"    -o, --outfile=FILE       write the corrected reads to FILE (default: READSFILE.ec.fa)\n"
"    -t, --threads=NUM        use NUM threads for the computation (default: 1)\n"
"    -d, --sample-rate=N      use occurrence array sample rate of N in the FM-index. Higher values use significantly\n"
"                             less memory at the cost of higher runtime. This value must be a power of 2 (default: 128)\n"
"    -k, --kmer-size=N        The length of the kmer used to seeding reads. (default: 23)\n"
"    -e, --error-rate         the maximum error rate allowed between two sequences to consider them overlapped (default: 0.08)\n"
"    -m, --min-overlap=LEN    minimum overlap required between two reads (default: 25)\n"
"\nReport bugs to " PACKAGE_BUGREPORT "\n\n";

static const char* PROGRAM_IDENT =
PACKAGE_NAME "::" SUBPROGRAM;

namespace opt
{
    static std::string prefix;
    static std::string outFile;
    static int numThreads = 1;
    static int sampleRate = BWT::DEFAULT_SAMPLE_RATE_SMALL;
    static int kmerLength = 23;
    static double errorRate = 0.08;
    static unsigned int minOverlap = DEFAULT_MIN_OVERLAP;
    static std::string readsFile;
    static std::string discardFile = "";

    static int intervalCacheLength = 10;
    static CandidateAlgorithm algorithm = ECA_OVERLAP; 
    static std::string readsFileDim;
}

static const char* shortopts = "p:o:t:b:d:k:e:m";

enum { OPT_HELP = 1, OPT_VERSION, OPT_METRICS, OPT_DISCARD, OPT_LEARN };

static const struct option longopts[] = {
    { "prefix",        required_argument, NULL, 'p' },
    { "outfile",       required_argument, NULL, 'o' },
    { "threads",       required_argument, NULL, 't' },
    { "buffer",        required_argument, NULL, 'b' },
    { "sample-rate",   required_argument, NULL, 'd' },
    { "kmer-size",     required_argument, NULL, 'k' },
    { "error-rate",    required_argument, NULL, 'e' },
    { "min-overlap",   required_argument, NULL, 'm' },
    { "help",          no_argument,       NULL, OPT_HELP },
    { "version",       no_argument,       NULL, OPT_VERSION },
    { NULL, 0, NULL, 0 }
};

//
// Main
//
int candidateMain(int argc, char** argv)
{
    parseCandidateOptions(argc, argv);

    Mask::initReadsMask(opt::readsFileDim);
    std::cout << "Determining sequencing errors for " << opt::readsFile << "\n";
    // Load indices
    BWT* pBWT = new BWT(opt::prefix + BWT_EXT, opt::sampleRate);
    BWT* pRBWT = NULL;
    SampledSuffixArray* pSSA = NULL;

    if(opt::algorithm == ECA_OVERLAP)
        pSSA = new SampledSuffixArray(opt::prefix + SAI_EXT, SSA_FT_SAI);

    BWTIntervalCache* pIntervalCache = new BWTIntervalCache(opt::intervalCacheLength, pBWT);

    BWTIndexSet indexSet;
    indexSet.pBWT = pBWT;
    indexSet.pRBWT = pRBWT;
    indexSet.pSSA = pSSA;
    indexSet.pCache = pIntervalCache;

    // Open outfiles and start a timer
    std::ostream* pWriter = createWriter(opt::outFile);
    std::ostream* pDiscardWriter = (!opt::discardFile.empty() ? createWriter(opt::discardFile) : NULL);
    Timer* pTimer = new Timer(PROGRAM_IDENT);
    pBWT->printInfo();

    // Set the error correction parameters
    CandidateParameters ecParams;
    ecParams.indices = indexSet;
    ecParams.algorithm = opt::algorithm;

    ecParams.minOverlap = opt::minOverlap;
    ecParams.minIdentity = 1.0f - opt::errorRate;

    ecParams.kmerLength = opt::kmerLength;
    // Setup post-processor
    CandidatePostProcess postProcessor(pWriter); 

    if(opt::numThreads <= 1)
    {
        // Serial mode
        CandidateProcess processor(ecParams); 
        SequenceProcessFramework::processSequencesSerial<SequenceWorkItem,
                                                         CandidateResult, 
                                                         CandidateProcess, 
                                                         CandidatePostProcess>(opt::readsFile, &processor, &postProcessor);
    }
    else
    {
        // Parallel mode
        std::vector<CandidateProcess*> processorVector;
        for(int i = 0; i < opt::numThreads; ++i)
        {
            CandidateProcess* pProcessor = new CandidateProcess(ecParams);
            processorVector.push_back(pProcessor);
        }
        
        SequenceProcessFramework::processSequencesParallel<SequenceWorkItem,
                                                           CandidateResult, 
                                                           CandidateProcess, 
                                                           CandidatePostProcess>(opt::readsFile, processorVector, &postProcessor);

        for(int i = 0; i < opt::numThreads; ++i)
        {
            delete processorVector[i];
        }
    }

    delete pBWT;
    delete pIntervalCache;
    if(pRBWT != NULL)
        delete pRBWT;

    if(pSSA != NULL)
        delete pSSA;

    delete pTimer;
    
    delete pWriter;
    if(pDiscardWriter != NULL)
        delete pDiscardWriter;

    Mask::cleanReadsMask();
    if(opt::numThreads > 1)
        pthread_exit(NULL);

    return 0;
}

// 
// Handle command line arguments
//
void parseCandidateOptions(int argc, char** argv)
{
    bool bDiscardReads = false;
    bool die = false;
    for (char c; (c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1;) 
    {
        std::istringstream arg(optarg != NULL ? optarg : "");
        switch (c) 
        {
            case 'p': arg >> opt::prefix; break;
            case 'o': arg >> opt::outFile; break;
            case 't': arg >> opt::numThreads; break;
            case 'd': arg >> opt::sampleRate; break;
            case 'k': arg >> opt::kmerLength; break;
            case 'e': arg >> opt::errorRate; break;
            case 'm': arg >> opt::minOverlap; break;
            case '?': die = true; break;
            case OPT_HELP:
                std::cout << CANDIDATE_USAGE_MESSAGE;
                exit(EXIT_SUCCESS);
            case OPT_VERSION:
                std::cout << CANDIDATE_VERSION_MESSAGE;
                exit(EXIT_SUCCESS);
        }
    }

    if (argc - optind < 1) 
    {
        std::cerr << SUBPROGRAM ": missing arguments\n";
        die = true;
    } 
    else if (argc - optind > 1) 
    {
        std::cerr << SUBPROGRAM ": too many arguments\n";
        die = true;
    }

    if(opt::numThreads <= 0)
    {
        std::cerr << SUBPROGRAM ": invalid number of threads: " << opt::numThreads << "\n";
        die = true;
    }

    if(opt::kmerLength <= 0)
    {
        std::cerr << SUBPROGRAM ": invalid kmer length: " << opt::kmerLength << ", must be greater than zero\n";
        die = true;
    }

    if (die) 
    {
        std::cout << "\n" << CANDIDATE_USAGE_MESSAGE;
        exit(EXIT_FAILURE);
    }

    // Validate parameters
    if(opt::errorRate <= 0)
        opt::errorRate = 0.08f;

    if(opt::errorRate > 1.0f)
    {
        std::cerr << "Invalid error-rate parameter: " << opt::errorRate << "\n";
        exit(EXIT_FAILURE);
    }

    // Parse the input filenames
    opt::readsFile = argv[optind++];
    opt::readsFileDim = opt::readsFile + ".dim";
    if(opt::prefix.empty())
    {
        opt::prefix = stripFilename(opt::readsFile);
    }

    std::string out_prefix = stripFilename(opt::readsFile);
    if(opt::outFile.empty())
    {
        opt::outFile = out_prefix + ".cand";
    }

    if(bDiscardReads)
    {
        opt::discardFile = out_prefix + ".discard";
    }
    else
    {
        opt::discardFile.clear();
    }
}
