/* This code is MODIFIED from the original source distribution, originally by
   M. Collins (1999).  The original documentation is contained below.
   
   The modifications to this file are by Min-Yen Kan, and are also distributed 
   under GNU GPL license, see below or the GNU GPL License included with the 
   distribution.

   The modifications enable the parser to work as a daemon, see the distributed
   README-daemonCollins.html for details.
 */

/* This code is the statistical natural language parser described in

   M. Collins. 1999.  Head-Driven
   Statistical Models for Natural Language Parsing. PhD Dissertation,
   University of Pennsylvania.

   Copyright (C) 1999 Michael Collins

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#ifndef SENTENCE_H
#define SENTENCE_H

#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>

#include "grammar.h"

/*max number of words in a sentence*/
#define PMAXWORDS 512


#define NT_LRB 1
#define NT_RRB 2

/* the data structure for a sentence, the input to the parsing algorthm

   words = words in the sentence
   tags  = tags in the sentence
   nws   = number of words in the sentence

   wordnos/tagnos = input to the chart parser. Details are:

   1) wordnos/tagnos are integers corresponding to the words/tags,
      as found in the dictionaries wordlex and nt_lex

   2) wordnos/tagnos is sentence *without* punctuation

   3) nws_np is the number of wordnos/tagnos (stands for nws_no-puncutation)

   4) wordpos[i] maps wordnos/tagnos to words/tags i.e. wordpos[i] is the 
      position of the i'th word in wordnos in the words array

   5) commaats[i] == 1 if wordnos[i] has a word tagged as "," or ":" following
      it (in the original words array). So this is the only piece of 
      information that the parsing algorithm uses about punctuation

   6) commaats2[i] == 1 means that the comma at position i is used to
      discard some constituents from the chart (see section 2.7 of Collins 96) 
      Usually, commaats2[i] == commaats[i]. In this implementation there is
      one exception: if the comma occurs between parantheses it is not used
      to prune constituents from the chart

 */

typedef struct {  
  int wordnos[PMAXWORDS];
  int tagnos[PMAXWORDS];
  int wordpos[PMAXWORDS];
  int nws_np;

  char *words[PMAXWORDS];
  char *tags[PMAXWORDS];
  int nws;

  char commaats[PMAXWORDS];

  int commatags[PMAXWORDS];
  int commawords[PMAXWORDS];

  char commaats2[PMAXWORDS];

} sentence_type;

/* reads a sentence from the file. Assumes the format is

   n word_1 tag_1 ... word_n tag_n

   e.g.

   18 Pierre NNP Vinken NNP , , 61 CD years NNS old JJ , , will MD join VB 
   the DT board NN as IN a DT nonexecutive JJ director NN Nov. NNP 29 CD . .

*/

int read_sentence(FILE *file,sentence_type *sentence);

/* reads up to max sentences into an array starting at s. 
   Returns the number of sentences which have been read
   */

int read_sentences(FILE *file,sentence_type *s,int max);

/* used when the parser fails to return a parse, prints 

   (TOP word_1/tag_1 ... word_n/tag_n )*/

void print_noparse(sentence_type *s);

#endif