/* This code is MODIFIED from the original source distribution, originally by
M. Collins (1999). The original documentation is contained below.
The modifications to this file are by Min-Yen Kan, and are also distributed
under GNU GPL license, see below or the GNU GPL License included with the
distribution.
The modifications enable the parser to work as a daemon, see the distributed
README-daemonCollins.html for details.
*/
/* This code is the statistical natural language parser described in
M. Collins. 1999. Head-Driven
Statistical Models for Natural Language Parsing. PhD Dissertation,
University of Pennsylvania.
Copyright (C) 1999 Michael Collins
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <assert.h>
#include "readevents.h"
#define U_CODE 3
#define D_CODE 2
#define F_CODE 6
#define G_CODE 4
void read_events_s(FILE *file,hash_table *hash);
void read_events_d(FILE *file,hash_table *hash);
void read_events_u(FILE *file,hash_table *hash);
void read_events_g(FILE *file,hash_table *hash);
int read_events_word(FILE *file);
int read_events_nt(FILE *file);
int read_events_word2(FILE *file);
int read_events_nt2(FILE *file);
void read_events(FILE *file,hash_table *hash,int max)
{
int m=0;
int code;
while(fscanf(file,"%d",&code)!=EOF&&(m<max||max==-1))
{
m++;
if(((double) (m/100000))==(((double) m)/100000.0))
fprintf(stderr,"Hash table: %d lines read\n",m);
if(code==F_CODE)
{
read_events_s(file,hash);
}
else if(code==D_CODE)
{
read_events_d(file,hash);
}
else if(code==U_CODE)
{
read_events_u(file,hash);
}
else if(code==G_CODE)
{
read_events_g(file,hash);
}
else assert(0);
}
}
void read_events_s(FILE *file,hash_table *hash)
{
int i,n;
char word[1000],tag[1000];
int wn,tn;
fscanf(file,"%d",&n);
for(i=0;i<n;i++)
{
fscanf(file,"%s %s",word,tag);
wn = find_word(word,&wordlex);
if(!(wn>=0))
{
assert(0);
}
tn = find_word(tag,&nt_lex);
if(!(tn>=0))
{
assert(0);
}
/*finally add counts for the word/tag pair to the hash table
add_tagword_entries2(wn,tn,hash);*/
add_tagword_counts(wn,tn,hash);
}
}
void read_events_d(FILE *file,hash_table *hash)
{
/* read in the following values:
wm/tm, wh/th are modifer word/tag, head word/tag
p,ch,cm are parent, head and modifier non-terminals
cc = 1 if coordination, 0 otherwise
punc = 1 if punctuation, 0 otherwise
wcc/tcc, wpunc/tpunc are coordinator word/tag, punctuation word/tag
subcat and dist are the subcat and distance strings
*/
int wm,tm,wh,th,p,ch,cm,cc,wcc,tcc,punc,wpunc,tpunc;
int subcat,dist;
wm = read_events_word(file);
tm = read_events_nt(file);
wh = read_events_word(file);
th = read_events_nt(file);
cm = read_events_nt(file);
p = read_events_nt(file);
ch = read_events_nt(file);
assert(fscanf(file,"%d",&subcat)!=EOF);
assert(fscanf(file,"%d",&dist)!=EOF);
assert(fscanf(file,"%d",&cc)!=EOF);
if(cc)
{
wcc = read_events_word(file);
tcc = read_events_nt(file);
}
assert(fscanf(file,"%d",&punc)!=EOF);
if(punc)
{
wpunc = read_events_word(file);
tpunc = read_events_nt(file);
}
if(cm == STOPNT) punc = 0;
add_dependency_counts(wm,tm,cm,
wh,th,
p,ch,
dist,subcat,
cc,wcc,tcc,
punc,wpunc,tpunc,
hash);
}
void read_events_u(FILE *file,hash_table *hash)
{
int wh,th,p,ch,lsubcat,rsubcat;
wh=read_events_word(file);
th=read_events_nt(file);
p=read_events_nt(file);
ch=read_events_nt(file);
assert(fscanf(file,"%d",&lsubcat)!=EOF);
assert(fscanf(file,"%d",&rsubcat)!=EOF);
/*now add the counts*/
add_unary_counts(ch,wh,th,p,hash);
add_subcat_counts(lsubcat,ch,wh,th,p,0,hash);
add_subcat_counts(rsubcat,ch,wh,th,p,1,hash);
/* u.p = p;
u.ch = ch;
u.wh = wh;
u.th = th;
add_entries_U(&u,hash,HUR);
add_entries_S(&u,hash,lsubcat,rsubcat);*/
}
void read_events_g(FILE *file,hash_table *hash)
{
int wh,th,p,ch,gap;
assert(fscanf(file,"%d",&gap)!=EOF);
wh=read_events_word(file);
th=read_events_nt(file);
p=read_events_nt(file);
ch=read_events_nt(file);
/*now add the counts*/
add_gap_counts(gap,ch,wh,th,p,hash);
}
int read_events_word(FILE *file)
{
char buffer[1000];
int w;
assert(fscanf(file,"%s",buffer)!=EOF);
if(strcmp(buffer,"#STOP#")==0)
w = STOPWORD;
else
w = find_word(buffer,&wordlex);
if(!(w>=0))
{
assert(0);
}
return w;
}
int read_events_nt(FILE *file)
{
char buffer[1000];
int w;
assert(fscanf(file,"%s",buffer)!=EOF);
if(strcmp(buffer,"#STOP#")==0)
w = STOPNT;
else
w = find_word(buffer,&nt_lex);
if(!(w>=0))
{
assert(0);
}
assert(w>=0);
return w;
}
int read_events_word2(FILE *file)
{
char buffer[1000];
assert(fscanf(file,"%s",buffer)!=EOF);
return 0;
}
int read_events_nt2(FILE *file)
{
char buffer[1000];
assert(fscanf(file,"%s",buffer)!=EOF);
return 0;
}