public class RandomizedDataChunkHadoopJob extends HadoopJob implements CommandlineRunnable
COLT_JAR, DISTRIBUTED_WEKA_BASE_JAR, DISTRIBUTED_WEKA_HADOOP_JAR, JCOMMON_JAR, JFREECHART_JAR, LA4J_JAR, OPEN_CSV_JAR| Constructor and Description |
|---|
RandomizedDataChunkHadoopJob() |
| Modifier and Type | Method and Description |
|---|---|
java.lang.String |
classAttributeTipText()
Tip text for this property
|
java.lang.String |
cleanOutputDirectoryTipText()
Tip text for this property
|
java.lang.String |
getClassAttribute()
Get the name or index of the class attribute ("first" and "last" can also
be used)
|
boolean |
getCleanOutputDirectory()
Get whether to blow away the output directory before running.
|
java.lang.String |
getCSVMapTaskOptions()
Get the options to the header job
|
boolean |
getDontDefaultToLastAttIfClassNotSpecified()
Non-command line option to allow clients to turn off the default behavior
of defaulting to setting the last attribute as the class if not explicitly
specified.
|
java.lang.String[] |
getJobOptionsOnly()
Get the options for this job only
|
java.lang.String |
getNumInstancesPerRandomizedDataChunk()
Get the number of instances that each randomly shuffled data chunk should
have.
|
java.lang.String |
getNumRandomizedDataChunks()
Get the number of randomly shuffled data chunks to create.
|
java.lang.String[] |
getOptions() |
java.lang.String |
getRandomizedChunkOutputPath()
Get the path to the output directory for this job
|
java.lang.String |
getRandomSeed()
Get the random seed for shuffling the data
|
java.lang.String |
globalInfo()
Help information
|
java.util.Enumeration<Option> |
listOptions() |
static void |
main(java.lang.String[] args) |
java.lang.String |
numInstancesPerRandomizedDataChunkTipText()
Tip text for this property
|
java.lang.String |
numRandomizedDataChunksTipText()
Tip text for this property
|
java.lang.String |
randomSeedTipText()
Tip text for this property
|
void |
run(java.lang.Object toRun,
java.lang.String[] options) |
boolean |
runJob() |
void |
setClassAttribute(java.lang.String c)
Set the name or index of the class attribute ("first" and "last" can also
be used)
|
void |
setCleanOutputDirectory(boolean clean)
Set whether to blow away the output directory before running.
|
void |
setCSVMapTaskOptions(java.lang.String opts)
Set the options to the header job
|
void |
setDontDefaultToLastAttIfClassNotSpecified(boolean d)
Non-command line option to allow clients to turn off the default behavior
of defaulting to setting the last attribute as the class if not explicitly
specified.
|
void |
setNumInstancesPerRandomizedDataChunk(java.lang.String insts)
Set the number of instances that each randomly shuffled data chunk should
have.
|
void |
setNumRandomizedDataChunks(java.lang.String chunks)
Set the number of randomly shuffled data chunks to create.
|
void |
setOptions(java.lang.String[] options) |
void |
setRandomSeed(java.lang.String seed)
Set the random seed for shuffling the data
|
additionalWekaPackagesTipText, cleanOutputDirectory, deubgTipText, getAdditionalWekaPackages, getBaseOptionsOnly, getDebug, getLoggingInterval, getMapNumber, getMapReduceJobConfig, getMapReduceNumber, getPathToWekaJar, getReduceNumber, loggingIntervalTipText, pathToWekaJarTipText, setAdditionalWekaPackages, setDebug, setLoggingInterval, setMapReduceJobConfig, setPathToWekaJarpublic java.lang.String globalInfo()
public void setCSVMapTaskOptions(java.lang.String opts)
opts - options to the header jobpublic java.lang.String getCSVMapTaskOptions()
public java.lang.String numRandomizedDataChunksTipText()
public void setDontDefaultToLastAttIfClassNotSpecified(boolean d)
d - true if the class is not to be set to the last attribute if the
user has not specifically specified a classpublic boolean getDontDefaultToLastAttIfClassNotSpecified()
public void setNumRandomizedDataChunks(java.lang.String chunks)
chunks - the number of chunks to create.public java.lang.String getNumRandomizedDataChunks()
public java.lang.String numInstancesPerRandomizedDataChunkTipText()
public void setNumInstancesPerRandomizedDataChunk(java.lang.String insts)
insts - the number of instances that each randomly shuffled data chunk
should containpublic java.lang.String getNumInstancesPerRandomizedDataChunk()
public java.lang.String classAttributeTipText()
public void setClassAttribute(java.lang.String c)
c - the name or index of the class attributepublic java.lang.String getClassAttribute()
public java.lang.String randomSeedTipText()
public void setRandomSeed(java.lang.String seed)
seed - the random seed to usepublic java.lang.String getRandomSeed()
public java.lang.String cleanOutputDirectoryTipText()
public void setCleanOutputDirectory(boolean clean)
clean - true if the output directory should be deleted before first
(thus forcing the job to run if there was a populated output
directory already).public boolean getCleanOutputDirectory()
public java.util.Enumeration<Option> listOptions()
listOptions in interface OptionHandlerlistOptions in class HadoopJobpublic void setOptions(java.lang.String[] options)
throws java.lang.Exception
setOptions in interface OptionHandlersetOptions in class HadoopJobjava.lang.Exceptionpublic java.lang.String[] getOptions()
getOptions in interface OptionHandlergetOptions in class HadoopJobpublic java.lang.String[] getJobOptionsOnly()
public boolean runJob()
throws weka.distributed.DistributedWekaException
runJob in class distributed.core.DistributedJobweka.distributed.DistributedWekaExceptionpublic java.lang.String getRandomizedChunkOutputPath()
public void run(java.lang.Object toRun,
java.lang.String[] options)
throws java.lang.IllegalArgumentException
run in interface CommandlineRunnablejava.lang.IllegalArgumentExceptionpublic static void main(java.lang.String[] args)