/******************************************************************************
** THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
** OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
**
** This code is distributed freely and may be used freely under the
** following conditions:
**   1. This notice may not be removed or altered.
**   2. Works derived from this code are not distributed for
**      commercial gain without explicit permission from NUS
******************************************************************************/
/*
** Implements (not very efficient) subroutines for CS5206 LEDA Assignment 1.
** Aug 30, 2008, Melvin Zhang
*/
#include <fcntl.h>
#include <errno.h>

#include <iostream>
using namespace std;

#include <LEDA/core/d_array.h>
#include <LEDA/core/list.h>
#include <LEDA/core/string.h>
using namespace leda;

/*
** Each call reads seglen characters from file fd into seg, ignoring newlines.
**
** If operation is successful, function returns the position where string is
** read from, and the file offset is only advanced a character (as opposed to
** advancing seglen characters).
**
** If operation is unsuccessful (e.g. less than seglen characters can be read),
** function returns 0.
*/
static int
_get_next_string(int fd, int seglen, char * seg)
{
    static int fpos = -1;
    int cnt;

    if (fpos == -1)
        fpos = lseek(fd, 0, SEEK_CUR);

    lseek (fd, fpos++, SEEK_SET);
    for (cnt = 0; cnt < seglen; )
    {
        if (read (fd, &(seg[cnt]), 1) != 1)
            return 0;
        switch (seg[cnt])
        {
        case '\n': if (cnt == 0) /* fpos is at a newline - advance a char */
                       fpos++;
                   break;
        default: cnt++;
                 break;
        }
    }
    seg[cnt] = '\0';

    return fpos;
}

/* - = - = - = - = - = - = - = - = - = - = - = - = - = - = - = - = - = - = - */

class _segment {
public:
    list<int> positions;  /* records the positions */
    int occurence;

    _segment() { occurence = 0; }
    ~_segment() {}

    void register_occurence (int position)
    {
        // Complete the implementation of this method
    }
};

/* - = - = - = - = - = - = - = - = - = - = - = - = - = - = - = - = - = - = - */

int
main(int argc, char ** argv)
{
    int fd, pos, seglen;
    char * fname, * seg;
    d_array<leda::string,_segment> DNA;

    if (argc < 3 || (seglen = atoi(argv[2])) < 0 )
    {
        cout << "Usage: " << argv[0] << " <file> <l>\n"
             << "Finds the most repeated patterns of length l in file"
             << endl;
        exit(1);
    }

    fname = strdup(argv[1]);
    seg = (char *) malloc (sizeof(char) * seglen + 1);
    if ( (fd = open(fname, O_RDONLY)) < 0 )
    {
        fprintf (stderr, "Unable to open file \"%s\": %s\n",
                 fname, strerror(errno));
        exit (1);
    }

    // Add your code here to complete the implementation

    // End your code here

    // DO NOT CHANGE THE CODES BELOW.
    // It prints out the answer in a standard format we use.

    int x;
    leda::string s;
    int ocr = 0;
    d_array<leda::string,_segment> RES;

    forall_defined(s, DNA)
        if (DNA[s].occurence > ocr)
        {
            RES.clear();
            RES[s] = DNA[s];
            ocr = DNA[s].occurence;
        }
        else if (DNA[s].occurence == ocr)
            RES[s] = DNA[s];

    cout << ocr << endl;
    forall_defined(s, RES)
    {
        cout << s << ":";
        forall (x, RES[s].positions) { cout << " " << x; }
        cout << endl;
    }

    return 0;
}


