public class WekaClassifierHadoopJob extends HadoopJob implements ClassifierProducer, CommandlineRunnable
COLT_JAR, DISTRIBUTED_WEKA_BASE_JAR, DISTRIBUTED_WEKA_HADOOP_JAR, JCOMMON_JAR, JFREECHART_JAR, LA4J_JAR, OPEN_CSV_JAR, TDIGEST_JAR
Constructor and Description |
---|
WekaClassifierHadoopJob()
Constructor
|
Modifier and Type | Method and Description |
---|---|
java.lang.String |
classAttributeTipText()
Tip text for this property
|
java.lang.String |
createRandomizedDataChunksTipText()
Tip text for this property
|
java.lang.String |
getClassAttribute()
Get the name or index of the class attribute ("first" and "last" can also
be used)
|
Classifier |
getClassifier() |
java.lang.String |
getClassifierMapTaskOptions()
Get the options for the underlying map task
|
boolean |
getCreateRandomizedDataChunks()
Get whether to create randomly shuffled (and stratified if the class is
nominal) data chunks via a pre-processing pass/job.
|
java.lang.String |
getCSVMapTaskOptions()
Get the options to the header job
|
java.lang.String[] |
getJobOptionsOnly()
Get the options for this job only
|
java.lang.String |
getMinTrainingFraction()
Get the minimum training fraction.
|
java.lang.String |
getModelFileName()
Get the name only for the model file
|
java.lang.String |
getNumInstancesPerRandomizedDataChunk()
Get the number of instances that each randomly shuffled data chunk should
have.
|
int |
getNumIterations()
Get the number of iterations (passes over the data) to run in the model
building phase.
|
java.lang.String |
getNumRandomizedDataChunks()
Get the number of randomly shuffled data chunks to create.
|
java.lang.String[] |
getOptions() |
java.lang.String |
getPathToPreconstructedFilter()
Get the path to a pre-constructed filter to use to pre-process the data
entering each map.
|
Instances |
getTrainingHeader() |
java.lang.String |
globalInfo()
Help information
|
java.util.Enumeration<Option> |
listOptions() |
static void |
main(java.lang.String[] args) |
java.lang.String |
minTrainingFractionTipText()
Tip text for this property
|
java.lang.String |
modelFileNameTipText()
Tip text for this property
|
java.lang.String |
numInstancesPerRandomizedDataChunkTipText()
Tip text for this property
|
java.lang.String |
numIterationsTipText()
Tip text for this property
|
java.lang.String |
numRandomizedDataChunksTipText()
Tip text for this property
|
java.lang.String |
pathToPreconstructedFilterTipText()
Tip text for this property
|
void |
run(java.lang.Object toRun,
java.lang.String[] args) |
boolean |
runJob() |
void |
setClassAttribute(java.lang.String c)
Set the name or index of the class attribute ("first" and "last" can also
be used)
|
void |
setClassifierMapTaskOptions(java.lang.String opts)
Set the options for the underlying map task
|
void |
setCreateRandomizedDataChunks(boolean s)
Set whether to create randomly shuffled (and stratified if the class is
nominal) data chunks via a pre-processing pass/job.
|
void |
setCSVMapTaskOptions(java.lang.String opts)
Set the options to the header job
|
void |
setMinTrainingFraction(java.lang.String frac)
Set the minimum training fraction.
|
void |
setModelFileName(java.lang.String m)
Set the name only for the model file
|
void |
setNumInstancesPerRandomizedDataChunk(java.lang.String insts)
Set the number of instances that each randomly shuffled data chunk should
have.
|
void |
setNumIterations(int i)
Set the number of iterations (passes over the data) to run in the model
building phase.
|
void |
setNumRandomizedDataChunks(java.lang.String chunks)
Set the number of randomly shuffled data chunks to create.
|
void |
setOptions(java.lang.String[] options) |
void |
setPathToPreconstructedFilter(java.lang.String path)
Set the path to a pre-constructed filter to use to pre-process the data
entering each map.
|
void |
stopJob() |
additionalWekaPackagesTipText, cleanOutputDirectory, deubgTipText, getAdditionalWekaPackages, getBaseOptionsOnly, getDebug, getLoggingInterval, getMapNumber, getMapReduceJobConfig, getMapReduceNumber, getPathToWekaJar, getReduceNumber, loggingIntervalTipText, logMessage, pathToWekaJarTipText, setAdditionalWekaPackages, setDebug, setLoggingInterval, setMapReduceJobConfig, setPathToWekaJar
environmentSubstitute, getAdditionalWekaPackageNames, getJobName, getJobStatus, getLog, logMessage, logMessage, makeOptionsStr, objectRowToInstance, parseInstance, postExecution, preExecution, setEnvironment, setJobDescription, setJobName, setJobStatus, setLog, setStatusMessagePrefix, stackTraceToString, statusMessage
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
postExecution, preExecution
public java.lang.String globalInfo()
public java.lang.String modelFileNameTipText()
public void setModelFileName(java.lang.String m)
m
- the name only (not full path) that the model should be saved topublic java.lang.String getModelFileName()
public java.lang.String numIterationsTipText()
public void setNumIterations(int i)
i
- the number of iterations to runpublic int getNumIterations()
public java.lang.String classAttributeTipText()
public void setClassAttribute(java.lang.String c)
c
- the name or index of the class attributepublic java.lang.String getClassAttribute()
public void setCSVMapTaskOptions(java.lang.String opts)
opts
- options to the header jobpublic java.lang.String getCSVMapTaskOptions()
public void setClassifierMapTaskOptions(java.lang.String opts)
opts
- the options for the underlying map taskpublic java.lang.String getClassifierMapTaskOptions()
public java.lang.String minTrainingFractionTipText()
public void setMinTrainingFraction(java.lang.String frac)
frac
- the fraction of training instances below which a model should
be discarded from the aggregationpublic java.lang.String getMinTrainingFraction()
public java.lang.String pathToPreconstructedFilterTipText()
public void setPathToPreconstructedFilter(java.lang.String path)
path
- the path to a pre-constructed filter to usepublic java.lang.String getPathToPreconstructedFilter()
public java.lang.String createRandomizedDataChunksTipText()
public void setCreateRandomizedDataChunks(boolean s)
s
- true if randomly shuffled data chunks are to be created for inputpublic boolean getCreateRandomizedDataChunks()
public java.lang.String numRandomizedDataChunksTipText()
public void setNumRandomizedDataChunks(java.lang.String chunks)
chunks
- the number of chunks to create.public java.lang.String getNumRandomizedDataChunks()
public java.lang.String numInstancesPerRandomizedDataChunkTipText()
public void setNumInstancesPerRandomizedDataChunk(java.lang.String insts)
insts
- the number of instances that each randomly shuffled data chunk
should containpublic java.lang.String getNumInstancesPerRandomizedDataChunk()
public java.util.Enumeration<Option> listOptions()
listOptions
in interface OptionHandler
listOptions
in class HadoopJob
public void setOptions(java.lang.String[] options) throws java.lang.Exception
setOptions
in interface OptionHandler
setOptions
in class HadoopJob
java.lang.Exception
public java.lang.String[] getOptions()
getOptions
in interface OptionHandler
getOptions
in class HadoopJob
public java.lang.String[] getJobOptionsOnly()
public boolean runJob() throws weka.distributed.DistributedWekaException
runJob
in class distributed.core.DistributedJob
weka.distributed.DistributedWekaException
public Classifier getClassifier()
getClassifier
in interface ClassifierProducer
public void stopJob()
stopJob
in class distributed.core.DistributedJob
public Instances getTrainingHeader()
getTrainingHeader
in interface ClassifierProducer
public static void main(java.lang.String[] args)
public void run(java.lang.Object toRun, java.lang.String[] args) throws java.lang.IllegalArgumentException
run
in interface CommandlineRunnable
run
in class distributed.core.DistributedJob
java.lang.IllegalArgumentException