/* This code is MODIFIED from the original source distribution, originally by
   M. Collins (1999).  The original documentation is contained below.
   
   The modifications to this file are by Min-Yen Kan, and are also distributed 
   under GNU GPL license, see below or the GNU GPL License included with the 
   distribution.

   The modifications enable the parser to work as a daemon, see the distributed
   README-daemonCollins.html for details.
 */

/* This code is the statistical natural language parser described in

   M. Collins. 1999.  Head-Driven
   Statistical Models for Natural Language Parsing. PhD Dissertation,
   University of Pennsylvania.

   Copyright (C) 1999 Michael Collins

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include <assert.h>
#include "prob_witheffhash.h"

#define DEFFTYPE 0
#define DEFFLEN 21

#define UEFFTYPE 1
#define UEFFLEN 8

#define SCEFFTYPE 2
#define SCEFFLEN 12

#define GEFFTYPE 3
#define GEFFLEN 9

/*array for caching prior probabilities*/
double prior_hashprobs[PMAXWORDS][GMAXNTS][GMAXNTS];


/* wm/tm/cm = modifer word/tag/non-terminal
   
   p=parent nt
   ch=head-child nt
   wh/th = head word/tag
   dist = distance variable
   subcat = subcat variable
*/

void make_alldep_string(char *string,int wm,int tm,int cm,int wh,int th,int p,int ch,int dist,int subcat,int cc,int wcc,int tcc,int punc,int wpunc,int tpunc);

double get_dependency_prob_witheffhash(int wm,int tm,int cm,int wh,int th,int p,int ch,int dist,int subcat,int cc,int wcc,int tcc,int punc,int wpunc,int tpunc,hash_table *hash,effhash_table *effhash)
{
  key_type key;
  unsigned char buffer[1000];
  int flag;
  double prob;

  key.key = buffer;
  key.klen = DEFFLEN;

  make_alldep_string(buffer,wm,tm,cm,wh,th,p,ch,dist,subcat,cc,wcc,tcc,punc,wpunc,tpunc);
  buffer[0] = DEFFTYPE;
  buffer[1] = 0;
  buffer[2] = 0;

  prob = eff_findprob(&key,effhash,&flag);


  if(flag) return prob;

  prob = get_dependency_prob(wm,tm,cm,wh,th,p,ch,dist,subcat,cc,wcc,tcc,punc,wpunc,tpunc,hash);
  eff_addprob(&key,effhash,prob);
  return prob;
}


double get_unary_prob_witheffhash(int ch,int wh,int th,int p,hash_table *hash,effhash_table *effhash)
{
  key_type key;
  unsigned char buffer[1000];
  int flag;
  double prob;

  key.key = buffer;
  key.klen = UEFFLEN;

  make_unary_string(buffer,ch,wh,th,p);
  buffer[0] = UEFFTYPE;
  buffer[1] = 0;
  buffer[2] = 0;

  prob = eff_findprob(&key,effhash,&flag);

  if(flag) return prob;

  prob = get_unary_prob(ch,wh,th,p,hash);

  eff_addprob(&key,effhash,prob);

  return prob;
}

/*subcat: dir=0 means left, dir=1 means right*/
double get_subcat_prob_witheffhash(int subcat,int ch,int wh,int th,int p,int dir,hash_table *hash,effhash_table *effhash)
{
  key_type key;
  unsigned char buffer[1000];
  int flag;
  double prob;

  key.key = buffer;
  key.klen = SCEFFLEN;

  make_subcat_string(buffer,subcat,ch,wh,th,p,dir);
  buffer[0] = SCEFFTYPE;
  buffer[1] = 0;
  buffer[2] = 0;

  prob = eff_findprob(&key,effhash,&flag);

  if(flag) return prob;

  prob = get_subcat_prob(subcat,ch,wh,th,p,dir,hash);
  eff_addprob(&key,effhash,prob);
  return prob;
}

double get_gap_prob_witheffhash(int gap,int ch,int wh,int th,int p,hash_table *hash,effhash_table *effhash)
{
  key_type key;
  unsigned char buffer[1000];
  int flag;
  double prob;

  key.key = buffer;
  key.klen = GEFFLEN;

  make_gap_string(buffer,gap,ch,wh,th,p);
  buffer[0] = GEFFTYPE;
  buffer[1] = 0;
  buffer[2] = 0;

  prob = eff_findprob(&key,effhash,&flag);

  if(flag) return prob;

  prob = get_gap_prob(gap,ch,wh,th,p,hash);
  eff_addprob(&key,effhash,prob);
  return prob;
}

double get_prior_prob_witheffhash(int ch,int wh,int th,hash_table *hash,int word,int tag)
{
  double prob;

  prob=prior_hashprobs[word][tag][ch];

  if(prob<0.1) return prob;

  prob = get_prior_prob(ch,wh,th,hash);

  prior_hashprobs[word][tag][ch] = prob;

  return (prob);
}

void reset_prior_hashprobs()
{
  int i,j,k;

  for(i=0;i<PMAXWORDS;i++)
    for(j=0;j<GMAXNTS;j++)
      for(k=0;k<GMAXNTS;k++)
	prior_hashprobs[i][j][k]=10;
}

void make_alldep_string(char *string,int wm,int tm,int cm,int wh,int th,int p,int ch,int dist,int subcat,int cc,int wcc,int tcc,int punc,int wpunc,int tpunc)
{

  
  byte2_to_char(&string[3],wm);

  string[5] = (char) tm;
  string[6] = (char) cm;
  string[7] = (char) punc;
  string[8] = (char) cc;

  string[9] = (char) p;
  string[10] = (char) ch;
  string[11] = (char) dist;
  byte3_to_char(&string[12],subcat);
  string[15] = (char) th;
  byte2_to_char(&string[16],wh);

  if(cc)
    {
      byte2_to_char(&string[18],wcc);
      string[20] = (char) tcc;
    }
  else
    {
      string[18]=string[19]=string[20]=0;
    }

  if(punc)
    {
      byte2_to_char(&string[18],wpunc);
      string[20] = (char) tpunc;
    }
  else
    {
      string[18]=string[19]=string[20]=0;
    }

}
