/* This code is MODIFIED from the original source distribution, originally by
M. Collins (1999). The original documentation is contained below.
The modifications to this file are by Min-Yen Kan, and are also distributed
under GNU GPL license, see below or the GNU GPL License included with the
distribution.
The modifications enable the parser to work as a daemon, see the distributed
README-daemonCollins.html for details.
*/
/* This code is the statistical natural language parser described in
M. Collins. 1999. Head-Driven
Statistical Models for Natural Language Parsing. PhD Dissertation,
University of Pennsylvania.
Copyright (C) 1999 Michael Collins
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include <assert.h>
#include "lexicon.h"
#include "grammar.h"
#include "mymalloc.h"
#include "mymalloc_char.h"
#include "hash.h"
#include "unistd.h"
#include "prob.h"
#include "readevents.h"
#include "sentence.h"
#include "chart.h"
void status (char *msg);
void process ();
int i;
int numsentences;
FILE *in;
FILE *out;
FILE *counts;
sentence_type *sentencePtr;
char grammar[1000];
char inputfile[1000];
char outputfile[1000];
char statusfile[1000];
char lockfile[1000];
char cmdBuf[1000];
char countsfile[1000];
float temp;
int npflag;
int mydaemon;
int sleep_interval = 2;
time_t g_time;
time_t s_time;
int main(int argc, char *argv[])
{
if(argc!=4 && argc!=6) {
fprintf(stderr,"ERROR in command line, usage:\n");
fprintf(stderr," parser.out -d countsfile grammarfile status_file polled_inputfile\n");
fprintf(stderr," parser.out countsfile grammarfile inputfile\n");
return 0;
}
i = 1;
sscanf(argv[i],"%s",countsfile);
if (strcmp (argv[i], "-d") == 0) {
printf ("will run as daemon\n");
mydaemon = 1;
i++;
sscanf(argv[i],"%s",countsfile);
} else {
printf ("running normally\n");
mydaemon = 0;
}
counts=fopen(countsfile,"r");
i++;
sscanf(argv[i],"%s",grammar);
BEAMPROB = log(10000);
PUNC_FLAG = 1;
DISTAFLAG = 1;
DISTVFLAG = 1;
npflag = 0;
assert(npflag==0 || npflag==1);
set_treebankoutputflag(npflag);
if (mydaemon) {
i++;
sscanf(argv[i],"%s",statusfile);
status ("initializing");
}
mymalloc_init();
mymalloc_char_init();
hash_make_table(8000007,&new_hash);
effhash_make_table(1000003,&eff_hash);
read_grammar(grammar);
read_events(counts,&new_hash,-1);
i++;
sscanf(argv[i],"%s",inputfile);
printf ("ready! -- if daemon mode, this parent process should exit.\n");
if (mydaemon) { /* should daemonize? */
pid_t pid = 0;
/* set stuff up */
/* accept command line args */
pid = fork();
if( pid == 0 ) {
while (1) {
process();
}
}
else {
/* "foreground" process exits */
exit(0);
}
} else {
while (1) {
process ();
}
}
}
void process () {
int interval;
int total;
char statusBuf[1000];
char filesDoneBuf[1000];
in = NULL;
status ("checking for input file");
while (in == NULL) {
/* check for presence of file */
in = fopen(inputfile,"r");
while (in == NULL) {
/* file isn't around or problem; assume it isn't around */
status ("snoring");
sleep (sleep_interval);
in = fopen(inputfile,"r");
}
fclose (in);
/* woke up, got work to do... */
status ("securing file");
/* it is around, lock it by moving it and reopen it */
strcpy (outputfile, inputfile);
strcat (outputfile, ".out");
strcpy (lockfile, inputfile);
strcat (lockfile, ".lock");
sprintf (cmdBuf, "%s %s %s", "mv ", inputfile, lockfile);
system (cmdBuf);
in = fopen(lockfile,"r");
out = fopen (outputfile,"w");
}
/* got the lock */
status ("got lock -- starting processing");
interval = 0;
total = 0;
while (1) {
if (interval > 10) {
interval = 0;
strcpy (statusBuf, "processing, done with ");
sprintf (filesDoneBuf, "%d", total);
strcat (statusBuf, filesDoneBuf);
strcat (statusBuf, " sentence(s)");
status (statusBuf);
}
sentencePtr = (sentence_type *) malloc (sizeof (sentence_type));
numsentences=read_sentence(in,sentencePtr);
if (numsentences == 0) {
status ("finished processing");
break;
}
time(&g_time);
pthresh = -5000000;
parse_sentence(sentencePtr);
time(&s_time);
fprintf(out, "TIME %d\n",(int) (s_time-g_time));
free (sentencePtr);
interval = interval + numsentences;
total = total + numsentences;
}
fclose (in);
fclose (out);
}
void status(char *msg) {
FILE *stat;
time_t rawtime;
struct tm * timeinfo;
time ( &rawtime );
timeinfo = localtime ( &rawtime );
if (mydaemon) {
stat = fopen (statusfile,"w");
if (stat != NULL) {
fprintf (stat, "%s: %s - %s", "parser", msg, asctime(timeinfo));
fclose (stat);
}
}
}