public class WekaClassifierHadoopJob extends HadoopJob implements ClassifierProducer, CommandlineRunnable
COLT_JAR, DISTRIBUTED_WEKA_BASE_JAR, DISTRIBUTED_WEKA_HADOOP_JAR, JCOMMON_JAR, JFREECHART_JAR, LA4J_JAR, OPEN_CSV_JAR| Constructor and Description |
|---|
WekaClassifierHadoopJob()
Constructor
|
| Modifier and Type | Method and Description |
|---|---|
java.lang.String |
classAttributeTipText()
Tip text for this property
|
java.lang.String |
createRandomizedDataChunksTipText()
Tip text for this property
|
java.lang.String |
getClassAttribute()
Get the name or index of the class attribute ("first" and "last" can also
be used)
|
Classifier |
getClassifier() |
java.lang.String |
getClassifierMapTaskOptions()
Get the options for the underlying map task
|
boolean |
getCreateRandomizedDataChunks()
Get whether to create randomly shuffled (and stratified if the class is
nominal) data chunks via a pre-processing pass/job.
|
java.lang.String |
getCSVMapTaskOptions()
Get the options to the header job
|
java.lang.String[] |
getJobOptionsOnly()
Get the options for this job only
|
java.lang.String |
getMinTrainingFraction()
Get the minimum training fraction.
|
java.lang.String |
getModelFileName()
Get the name only for the model file
|
java.lang.String |
getNumInstancesPerRandomizedDataChunk()
Get the number of instances that each randomly shuffled data chunk should
have.
|
int |
getNumIterations()
Get the number of iterations (passes over the data) to run in the model
building phase.
|
java.lang.String |
getNumRandomizedDataChunks()
Get the number of randomly shuffled data chunks to create.
|
java.lang.String[] |
getOptions() |
java.lang.String |
getPathToPreconstructedFilter()
Get the path to a pre-constructed filter to use to pre-process the data
entering each map.
|
Instances |
getTrainingHeader() |
java.lang.String |
globalInfo()
Help information
|
java.util.Enumeration<Option> |
listOptions() |
static void |
main(java.lang.String[] args) |
java.lang.String |
minTrainingFractionTipText()
Tip text for this property
|
java.lang.String |
modelFileNameTipText()
Tip text for this property
|
java.lang.String |
numInstancesPerRandomizedDataChunkTipText()
Tip text for this property
|
java.lang.String |
numIterationsTipText()
Tip text for this property
|
java.lang.String |
numRandomizedDataChunksTipText()
Tip text for this property
|
java.lang.String |
pathToPreconstructedFilterTipText()
Tip text for this property
|
void |
run(java.lang.Object toRun,
java.lang.String[] args) |
boolean |
runJob() |
void |
setClassAttribute(java.lang.String c)
Set the name or index of the class attribute ("first" and "last" can also
be used)
|
void |
setClassifierMapTaskOptions(java.lang.String opts)
Set the options for the underlying map task
|
void |
setCreateRandomizedDataChunks(boolean s)
Set whether to create randomly shuffled (and stratified if the class is
nominal) data chunks via a pre-processing pass/job.
|
void |
setCSVMapTaskOptions(java.lang.String opts)
Set the options to the header job
|
void |
setMinTrainingFraction(java.lang.String frac)
Set the minimum training fraction.
|
void |
setModelFileName(java.lang.String m)
Set the name only for the model file
|
void |
setNumInstancesPerRandomizedDataChunk(java.lang.String insts)
Set the number of instances that each randomly shuffled data chunk should
have.
|
void |
setNumIterations(int i)
Set the number of iterations (passes over the data) to run in the model
building phase.
|
void |
setNumRandomizedDataChunks(java.lang.String chunks)
Set the number of randomly shuffled data chunks to create.
|
void |
setOptions(java.lang.String[] options) |
void |
setPathToPreconstructedFilter(java.lang.String path)
Set the path to a pre-constructed filter to use to pre-process the data
entering each map.
|
void |
stopJob() |
additionalWekaPackagesTipText, cleanOutputDirectory, deubgTipText, getAdditionalWekaPackages, getBaseOptionsOnly, getDebug, getLoggingInterval, getMapNumber, getMapReduceJobConfig, getMapReduceNumber, getPathToWekaJar, getReduceNumber, loggingIntervalTipText, pathToWekaJarTipText, setAdditionalWekaPackages, setDebug, setLoggingInterval, setMapReduceJobConfig, setPathToWekaJarpublic java.lang.String globalInfo()
public java.lang.String modelFileNameTipText()
public void setModelFileName(java.lang.String m)
m - the name only (not full path) that the model should be saved topublic java.lang.String getModelFileName()
public java.lang.String numIterationsTipText()
public void setNumIterations(int i)
i - the number of iterations to runpublic int getNumIterations()
public java.lang.String classAttributeTipText()
public void setClassAttribute(java.lang.String c)
c - the name or index of the class attributepublic java.lang.String getClassAttribute()
public void setCSVMapTaskOptions(java.lang.String opts)
opts - options to the header jobpublic java.lang.String getCSVMapTaskOptions()
public void setClassifierMapTaskOptions(java.lang.String opts)
opts - the options for the underlying map taskpublic java.lang.String getClassifierMapTaskOptions()
public java.lang.String minTrainingFractionTipText()
public void setMinTrainingFraction(java.lang.String frac)
frac - the fraction of training instances below which a model should
be discarded from the aggregationpublic java.lang.String getMinTrainingFraction()
public java.lang.String pathToPreconstructedFilterTipText()
public void setPathToPreconstructedFilter(java.lang.String path)
path - the path to a pre-constructed filter to usepublic java.lang.String getPathToPreconstructedFilter()
public java.lang.String createRandomizedDataChunksTipText()
public void setCreateRandomizedDataChunks(boolean s)
s - true if randomly shuffled data chunks are to be created for inputpublic boolean getCreateRandomizedDataChunks()
public java.lang.String numRandomizedDataChunksTipText()
public void setNumRandomizedDataChunks(java.lang.String chunks)
chunks - the number of chunks to create.public java.lang.String getNumRandomizedDataChunks()
public java.lang.String numInstancesPerRandomizedDataChunkTipText()
public void setNumInstancesPerRandomizedDataChunk(java.lang.String insts)
insts - the number of instances that each randomly shuffled data chunk
should containpublic java.lang.String getNumInstancesPerRandomizedDataChunk()
public java.util.Enumeration<Option> listOptions()
listOptions in interface OptionHandlerlistOptions in class HadoopJobpublic void setOptions(java.lang.String[] options)
throws java.lang.Exception
setOptions in interface OptionHandlersetOptions in class HadoopJobjava.lang.Exceptionpublic java.lang.String[] getOptions()
getOptions in interface OptionHandlergetOptions in class HadoopJobpublic java.lang.String[] getJobOptionsOnly()
public boolean runJob()
throws weka.distributed.DistributedWekaException
runJob in class distributed.core.DistributedJobweka.distributed.DistributedWekaExceptionpublic Classifier getClassifier()
getClassifier in interface ClassifierProducerpublic void stopJob()
stopJob in class distributed.core.DistributedJobpublic Instances getTrainingHeader()
getTrainingHeader in interface ClassifierProducerpublic static void main(java.lang.String[] args)
public void run(java.lang.Object toRun,
java.lang.String[] args)
throws java.lang.IllegalArgumentException
run in interface CommandlineRunnablejava.lang.IllegalArgumentException