//-----------------------------------------------
// Copyright 2016 Guangxi University
// Written by Liang Zhao(S080011@e.ntu.edu.sg)
// Released under the GPL
//-----------------------------------------------
//
// correct - Correct sequencing errors in reads 
//
#include <iostream>
#include <fstream>
#include <sstream>
#include <iterator>
#include "correct.h"
#include "ErrorCorrect.h"
#include "Util.h"

//
// Getopt
//
#define SUBPROGRAM "correct"
static const char *CORRECT_VERSION_MESSAGE =
SUBPROGRAM " Version " PACKAGE_VERSION "\n";

static const char *CORRECT_USAGE_MESSAGE =
"Usage: " PACKAGE_NAME " " SUBPROGRAM " [OPTION] ... READSFILE\n"
"\n"
"    --help                           display this help and exit\n"
"    -c, --candidate=STR              the candidate erroneous bases file\n"
"    -o, --outfile=FILE               write the corrected reads to FILE (default: READSFILE.ec.fa)\n"
"    -f, --format=STR                 the output format, can be fasta or fastq (default: fasta)\n"
"    -b, --buffer=NUM                 the size of buffer used to correct errors (default: 2147483648, i.e., 2G)\n"
"    -t, --threads=NUM                use NUM threads for the computation (default: #cores-1)\n"
"    -r, --log-ratio=NUM              the log ratio threshold to determine erroneous base (default: -1.2)\n"
"    -e, --errRate=NUM                the estimated error rate (default: 0.01)\n"
"    -m, --maxFreq=NUM                the maximum frequency of prospective erroneous base (default: 3)\n"
"\nReport bugs to " PACKAGE_BUGREPORT "\n\n";

static const char* PROGRAM_IDENT =
PACKAGE_NAME "::" SUBPROGRAM;

namespace opt
{
    static std::string candidateFile;
    static std::string outFile;
    static std::string outFormat = "fasta";
    size_t bufferSize = 2147483648;
    size_t numThreads = 0;
    double logRatio = -1.2;
    double errRate = 0.01;
    size_t maxFreq = 3;
    static std::string readsFileDim;
    static std::string readsFile;
}

static const char* shortopts = "c:o:f:b:t:r:e:k";

enum { OPT_HELP = 1, OPT_VERSION };

static const struct option longopts[] = {
    { "candidate",     required_argument, NULL, 'c' },
    { "outfile",       required_argument, NULL, 'o' },
    { "format",        required_argument, NULL, 'f' },
    { "buffer",        required_argument, NULL, 'b' },
    { "threads",       required_argument, NULL, 't' },
    { "log-ratio",     required_argument, NULL, 'r' },
    { "errRate",       required_argument, NULL, 'e' },
    { "maxFreq",       required_argument, NULL, 'm' },
    { "help",          no_argument,       NULL, OPT_HELP },
    { "version",       no_argument,       NULL, OPT_VERSION },
    { NULL, 0, NULL, 0 }
};

//
// Main
//
int correctMain(int argc, char** argv)
{
    parseCorrectOptions(argc, argv);
    Corrector cr(opt::readsFile, 
                 opt::candidateFile, 
                 opt::bufferSize, 
                 opt::numThreads, 
                 opt::outFormat, 
                 opt::readsFileDim, 
                 opt::logRatio, 
                 opt::outFile, 
                 opt::errRate,
                 opt::maxFreq);
    cr.importReads();
    cr.correct();
    cr.postprocess();
    return 0;
}

// 
// Handle command line arguments
//
void parseCorrectOptions(int argc, char** argv)
{
    bool die = false;
    for (char c; (c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1;) 
    {
        std::istringstream arg(optarg != NULL ? optarg : "");
        switch (c) 
        {
            case 'c': arg >> opt::candidateFile; break;
            case 'o': arg >> opt::outFile; break;
            case 'f': arg >> opt::outFormat; break;
            case 'b': arg >> opt::bufferSize; break;
            case 't': arg >> opt::numThreads; break;
            case 'r': arg >> opt::logRatio; break;
            case 'e': arg >> opt::errRate; break;
            case 'm': arg >> opt::maxFreq; break;
            case OPT_HELP:
                std::cout << CORRECT_USAGE_MESSAGE;
                exit(EXIT_SUCCESS);
            case OPT_VERSION:
                std::cout << CORRECT_VERSION_MESSAGE;
                exit(EXIT_SUCCESS);
        }
    }

    if (argc - optind < 1) 
    {
        std::cerr << SUBPROGRAM ": missing arguments\n";
        die = true;
    } 
    else if (argc - optind > 1) 
    {
        std::cerr << SUBPROGRAM ": too many arguments\n";
        die = true;
    }

    if(opt::numThreads <= 0)
    {
        std::cerr << SUBPROGRAM ": invalid number of threads: " << opt::numThreads << "\n";
        die = true;
    }

    if(opt::logRatio >= 0)
    {
        std::cerr << SUBPROGRAM ": invalid value of log-ratio: " << opt::logRatio << ", must be less than 0\n";
        die = true;
    }
    
    if(opt::bufferSize <= 0)
    {
        std::cerr << SUBPROGRAM ": invalid size of buffer: " << opt::bufferSize << ", must be larger than 0\n";
        die = true;
    }

    if((opt::errRate >= 1) || (opt::errRate < 0))
    {
        std::cerr << SUBPROGRAM ": invalid error rate: " << opt::errRate << ", must between 0 and 1\n";
        die = true;
    }

    if((opt::maxFreq < 1) || (opt::maxFreq > 15))
    {
        std::cerr << SUBPROGRAM ": invalid maximum frequency of erroneous base: " << opt::maxFreq << ", must between 1 and 15\n";
        die = true;
    }

    if((opt::outFormat != "fasta") && (opt::outFormat != "fastq"))
    {
        std::cerr << SUBPROGRAM ": invalid output file format: " << opt::outFormat << ", must be fasta or fastq\n";
        die = true;
    }

    if (die) 
    {
        std::cout << "\n" << CORRECT_USAGE_MESSAGE;
        exit(EXIT_FAILURE);
    }

    opt::readsFile = argv[optind++];
    opt::readsFileDim = opt::readsFile + ".dim";

    std::string out_prefix = stripFilename(opt::readsFile);
    if(opt::outFile.empty())
    {
        if(opt::outFormat == "fasta")
            opt::outFile = out_prefix + ".ec.fasta";
        else if (opt::outFormat == "fastq")
            opt::outFile = out_prefix + ".ec.fastq";
    }
}

