sg.edu.nus.comp.nlp.ims.implement
Class CTrainModel

java.lang.Object
  extended by sg.edu.nus.comp.nlp.ims.implement.CTrainModel

public class CTrainModel
extends java.lang.Object

main interface of training.

Author:
zhongzhi

Field Summary
protected  java.lang.String m_CorpusName
           
protected  java.util.Hashtable<java.lang.String,java.lang.Integer> m_CutOffs
           
protected  java.lang.String m_Delimiter
           
protected  java.lang.String m_FeatureExtractorName
           
protected  java.lang.String m_InstanceExtractorName
           
protected  boolean m_Lemmatized
           
protected  java.util.ArrayList<java.lang.Object> m_Models
           
protected  boolean m_POSTagged
           
protected  boolean m_Split
           
protected  boolean m_Tokenized
           
protected  IModelTrainer m_Trainer
           
protected  IModelWriter m_Writer
           
 
Constructor Summary
CTrainModel()
          default constructor
 
Method Summary
 void clear()
          clear the generated models
 java.util.ArrayList<java.lang.Object> getModels()
          get models
static void main(java.lang.String[] p_Args)
           
 void setCorpusClassName(java.lang.String p_Name)
          set the corpus class name
 void setCutOff(java.lang.String p_Key, int p_Value)
          set cut off
 void setDelimiter(java.lang.String p_Delimiter)
          set the delimiter
 void setFeatureExtractorName(java.lang.String p_Name)
          set the feature extractor name
 void setInstanceExtractorName(java.lang.String p_Name)
          set the instance extractor name
 void setLemmatized(boolean p_Lemmatized)
          whether the lemma info is provided
 void setModelTrainer(IModelTrainer p_ModelTrainer)
          set model trainer
 void setModelWriter(IModelWriter p_ModelWriter)
          set model writer
 void setPOSTagged(boolean p_POSTagged)
          whether the pos info is provided
 void setSplit(boolean p_Split)
          whether the input is already split
 void setTokenized(boolean p_Tokenized)
          whether sentences are already tokenized
 void train(java.io.Reader p_XmlReader, java.io.BufferedReader p_KeyReader)
          train model with given xml and key
 void train(java.lang.String p_XmlFile, java.lang.String p_KeyFile)
          train model with given xml and key
 void write()
          write models to disk
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

m_Models

protected java.util.ArrayList<java.lang.Object> m_Models

m_Trainer

protected IModelTrainer m_Trainer

m_Writer

protected IModelWriter m_Writer

m_CorpusName

protected java.lang.String m_CorpusName

m_InstanceExtractorName

protected java.lang.String m_InstanceExtractorName

m_FeatureExtractorName

protected java.lang.String m_FeatureExtractorName

m_CutOffs

protected java.util.Hashtable<java.lang.String,java.lang.Integer> m_CutOffs

m_Delimiter

protected java.lang.String m_Delimiter

m_Split

protected boolean m_Split

m_Tokenized

protected boolean m_Tokenized

m_Lemmatized

protected boolean m_Lemmatized

m_POSTagged

protected boolean m_POSTagged
Constructor Detail

CTrainModel

public CTrainModel()
default constructor

Method Detail

train

public void train(java.lang.String p_XmlFile,
                  java.lang.String p_KeyFile)
           throws java.lang.Exception
train model with given xml and key

Parameters:
p_XmlFile - train xml file
p_KeyFile - train key file
Throws:
java.lang.Exception - train exception

train

public void train(java.io.Reader p_XmlReader,
                  java.io.BufferedReader p_KeyReader)
           throws java.lang.Exception
train model with given xml and key

Parameters:
p_XmlReader - train xml file reader
p_KeyReader - train key file reader
Throws:
java.lang.Exception - train exception

setSplit

public void setSplit(boolean p_Split)
whether the input is already split

Parameters:
p_Split - whether split

setTokenized

public void setTokenized(boolean p_Tokenized)
whether sentences are already tokenized

Parameters:
p_Tokenized - whether tokenized

setPOSTagged

public void setPOSTagged(boolean p_POSTagged)
whether the pos info is provided

Parameters:
p_POSTagged - whether pos tagged

setLemmatized

public void setLemmatized(boolean p_Lemmatized)
whether the lemma info is provided

Parameters:
p_Lemmatized - whether lemmatized

setDelimiter

public void setDelimiter(java.lang.String p_Delimiter)
set the delimiter

Parameters:
p_Delimiter - delimiter

setModelTrainer

public void setModelTrainer(IModelTrainer p_ModelTrainer)
set model trainer

Parameters:
p_ModelTrainer - model trainer

setModelWriter

public void setModelWriter(IModelWriter p_ModelWriter)
set model writer

Parameters:
p_ModelWriter - model writer

setCorpusClassName

public void setCorpusClassName(java.lang.String p_Name)
set the corpus class name

Parameters:
p_Name - corpus class name

setInstanceExtractorName

public void setInstanceExtractorName(java.lang.String p_Name)
set the instance extractor name

Parameters:
p_Name - instance extractor name

setFeatureExtractorName

public void setFeatureExtractorName(java.lang.String p_Name)
set the feature extractor name

Parameters:
p_Name - feature extractor name

setCutOff

public void setCutOff(java.lang.String p_Key,
                      int p_Value)
set cut off

Parameters:
p_Key - key name
p_Value - value

getModels

public java.util.ArrayList<java.lang.Object> getModels()
get models

Returns:
models

clear

public void clear()
clear the generated models


write

public void write()
           throws java.io.IOException
write models to disk

Throws:
java.io.IOException - exception while save model

main

public static void main(java.lang.String[] p_Args)
Parameters:
p_Args - arguments