/* This code is MODIFIED from the original source distribution, originally by
M. Collins (1999). The original documentation is contained below.
The modifications to this file are by Min-Yen Kan, and are also distributed
under GNU GPL license, see below or the GNU GPL License included with the
distribution.
The modifications enable the parser to work as a daemon, see the distributed
README-daemonCollins.html for details.
*/
/* This code is the statistical natural language parser described in
M. Collins. 1999. Head-Driven
Statistical Models for Natural Language Parsing. PhD Dissertation,
University of Pennsylvania.
Copyright (C) 1999 Michael Collins
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <assert.h>
#include "sentence.h"
extern FILE* out;
void convert_sentence(sentence_type *sentence);
int read_sentence(FILE *file,sentence_type *sentence)
{
int i;
char wbuf[100],tbuf[100];
int lw,lt;
if(fscanf(file,"%d",&sentence->nws)==EOF)
return 0;
assert(sentence->nws<PMAXWORDS);
for(i=0;i<sentence->nws;i++)
{
fscanf(file,"%s %s ",wbuf,tbuf);
sentence->words[i]=strdup(wbuf);
sentence->tags[i]=strdup(tbuf);
}
convert_sentence(sentence);
return 1;
}
int read_sentences(FILE *file,sentence_type *s,int max)
{
int num=0;
while(num<max&&read_sentence(file,&s[num])!=0)
num++;
return num;
}
void print_noparse(sentence_type *orig)
{
int i;
fprintf(out, "(TOP ");
for(i=0;i<orig->nws;i++)
fprintf(out, "%s/%s ",orig->words[i],orig->tags[i]);
fprintf(out, ")\n");
}
int ispunc(char *tag);
int iscomma(char *tag,char *word);
/* calculates wordnos, tagnos, nws_np, commaats and commaats2 from
words, tags, and nws (see sentence.h for the details of these
variables)
*/
void convert_sentence(sentence_type *sentence)
{
int i;
int n;
int w,t;
int lflag;
n = 0;
for(i=0;i<PMAXWORDS;i++)
sentence->commaats[i]=0;
for(i=0;i<sentence->nws;i++)
{
if(!ispunc(sentence->tags[i]))
{
w = find_word(sentence->words[i],&wordlex);
if(w==-1)
sentence->wordnos[n] = GUNKNOWN;
else
sentence->wordnos[n] = w;
sentence->wordpos[n] = i;
t = find_word(sentence->tags[i],&nt_lex);
if(!(t>=0))
{
assert(0);
}
sentence->tagnos[n] = t;
n++;
}
else
{
if(iscomma(sentence->tags[i],sentence->words[i])&&n>0)
{
sentence->commaats[n-1] =1;
w = find_word(sentence->words[i],&wordlex);
if(w==-1)
sentence->commawords[n-1] = GUNKNOWN;
else
sentence->commawords[n-1] = w;
t = find_word(sentence->tags[i],&nt_lex);
if(!(t>=0))
{
assert(0);
}
sentence->commatags[n-1] = t;
}
}
}
sentence->nws_np = n;
sentence->commaats[n-1] = 0;
lflag=0;
for(i=0;i<sentence->nws_np+1;i++)
{
if(sentence->tagnos[i]==NT_LRB) lflag=1;
if(sentence->tagnos[i]==NT_RRB) lflag=0;
if(lflag==0)
sentence->commaats2[i]=sentence->commaats[i];
else
sentence->commaats2[i]=0;
}
}
int ispunc(char *tag)
{
if(strcmp(tag,",")==0) return 1;
if(strcmp(tag,".")==0) return 1;
if(strcmp(tag,"``")==0) return 1;
if(strcmp(tag,"''")==0) return 1;
if(strcmp(tag,":")==0) return 1;
return 0;
}
int iscomma(char *tag,char *word)
{
if(strcmp(word,"...")==0) return 0;
if(strcmp(tag,",")==0) return 1;
if(strcmp(tag,":")==0) return 1;
return 0;
}