/* This code is MODIFIED from the original source distribution, originally by
M. Collins (1999). The original documentation is contained below.
The modifications to this file are by Min-Yen Kan, and are also distributed
under GNU GPL license, see below or the GNU GPL License included with the
distribution.
The modifications enable the parser to work as a daemon, see the distributed
README-daemonCollins.html for details.
*/
/* This code is the statistical natural language parser described in
M. Collins. 1999. Head-Driven
Statistical Models for Natural Language Parsing. PhD Dissertation,
University of Pennsylvania.
Copyright (C) 1999 Michael Collins
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <assert.h>
#include "genprob.h"
#define BONTYPE 0 /*numerators sub-type*/
#define BODTYPE 1 /*denominators*/
#define BOUTYPE 2 /*unique outcomes count*/
#define PROBSMALL 0.0000000000000000001
void add_counts(unsigned char *event,int olen,int *backoffs,char type,hash_table *hash)
{
int i;
key_type key;
unsigned char buffer[1000];
int len; /*total length of the input string*/
int ns[100];
len = 3+olen+backoffs[1];
key.key = buffer;
for(i=0;i<len;i++)
buffer[i] = event[i];
/*first add the numerators*/
assert(backoffs[0]<100);
buffer[0] = type;
buffer[1] = BONTYPE;
for(i=1;i<=backoffs[0];i++)
{
buffer[2] = i;
key.klen = 3+olen+backoffs[i];
ns[i] = hash_add_element(&key,hash,1);
}
/*now the unique counts*/
key.key = buffer+olen;
buffer[olen] = type;
buffer[olen+1] = BOUTYPE;
for(i=1;i<=backoffs[0];i++)
{
if(ns[i] == 1)
{
buffer[olen+2] = i;
key.klen = 3+backoffs[i];
hash_add_element(&key,hash,1);
}
}
/*now the denominators*/
key.key = buffer+olen;
buffer[olen] = type;
buffer[olen+1] = BODTYPE;
for(i=1;i<=backoffs[0];i++)
{
buffer[olen+2] = i;
key.klen = 3+backoffs[i];
hash_add_element(&key,hash,1);
}
}
void add_counts_level(unsigned char *event,int olen,int *backoffs,int level,char type,hash_table *hash)
{
int i;
key_type key;
unsigned char buffer[1000];
int len; /*total length of the input string*/
int ns[100];
len = 3+olen+backoffs[1];
key.key = buffer;
for(i=0;i<len;i++)
buffer[i] = event[i];
/*first add the numerators*/
assert(backoffs[0]<100);
buffer[0] = type;
buffer[1] = BONTYPE;
for(i=1;i<=backoffs[0];i++)
{
buffer[2] = level;
key.klen = 3+olen+backoffs[i];
ns[i] = hash_add_element(&key,hash,1);
}
/*now the unique counts*/
key.key = buffer+olen;
buffer[olen] = type;
buffer[olen+1] = BOUTYPE;
for(i=1;i<=backoffs[0];i++)
{
if(ns[i] == 1)
{
buffer[olen+2] = level;
key.klen = 3+backoffs[i];
hash_add_element(&key,hash,1);
}
}
/*now the denominators*/
key.key = buffer+olen;
buffer[olen] = type;
buffer[olen+1] = BODTYPE;
for(i=1;i<=backoffs[0];i++)
{
buffer[olen+2] = level;
key.klen = 3+backoffs[i];
hash_add_element(&key,hash,1);
}
}
double get_prob(unsigned char *event,int olen,int *backoffs,char type,int w1,int w2,hash_table *hash)
{
int i;
key_type key;
unsigned char buffer[1000];
int len; /*total length of the input string*/
int ns[100],us[100],ds[100]; /*counts for numerators, denominators, uniques at
each level. Assumes that level 1 is most specific
*/
double prob;
int bo;
len = 3+olen+backoffs[1];
key.key = buffer;
for(i=0;i<len;i++)
buffer[i] = event[i];
/*first get the numerators*/
assert(backoffs[0]<100);
buffer[0] = type;
buffer[1] = BONTYPE;
for(i=1;i<=backoffs[0];i++)
{
buffer[2] = i;
key.klen = 3+olen+backoffs[i];
ns[i] = hash_find_element(&key,hash);
}
/*now the unique counts*/
key.key = buffer+olen;
buffer[olen] = type;
buffer[olen+1] = BOUTYPE;
for(i=1;i<=backoffs[0];i++)
{
buffer[olen+2] = i;
key.klen = 3+backoffs[i];
us[i] = hash_find_element(&key,hash);
}
/*now the denominators*/
key.key = buffer+olen;
buffer[olen] = type;
buffer[olen+1] = BODTYPE;
for(i=1;i<=backoffs[0];i++)
{
buffer[olen+2] = i;
key.klen = 3+backoffs[i];
ds[i]=hash_find_element(&key,hash);
}
if(ds[backoffs[0]] <= 0.1)
return PROBSMALL;
assert( ds[backoffs[0]] > 0.1);
assert( us[backoffs[0]] > 0.1);
prob = PROBSMALL;
for(i=backoffs[0];i>=1;i--)
{
bo = w1 + w2*us[i];
if(ds[i] > 0.1)
prob = (bo*prob + ns[i]) /( (double) (bo + ds[i]));
}
return prob;
}