public class RandomizedDataChunkHadoopJob extends HadoopJob implements CommandlineRunnable
COLT_JAR, DISTRIBUTED_WEKA_BASE_JAR, DISTRIBUTED_WEKA_HADOOP_JAR, JCOMMON_JAR, JFREECHART_JAR, LA4J_JAR, OPEN_CSV_JAR
Constructor and Description |
---|
RandomizedDataChunkHadoopJob() |
Modifier and Type | Method and Description |
---|---|
java.lang.String |
classAttributeTipText()
Tip text for this property
|
java.lang.String |
cleanOutputDirectoryTipText()
Tip text for this property
|
java.lang.String |
getClassAttribute()
Get the name or index of the class attribute ("first" and "last" can also
be used)
|
boolean |
getCleanOutputDirectory()
Get whether to blow away the output directory before running.
|
java.lang.String |
getCSVMapTaskOptions()
Get the options to the header job
|
boolean |
getDontDefaultToLastAttIfClassNotSpecified()
Non-command line option to allow clients to turn off the default behavior
of defaulting to setting the last attribute as the class if not explicitly
specified.
|
java.lang.String[] |
getJobOptionsOnly()
Get the options for this job only
|
java.lang.String |
getNumInstancesPerRandomizedDataChunk()
Get the number of instances that each randomly shuffled data chunk should
have.
|
java.lang.String |
getNumRandomizedDataChunks()
Get the number of randomly shuffled data chunks to create.
|
java.lang.String[] |
getOptions() |
java.lang.String |
getRandomizedChunkOutputPath()
Get the path to the output directory for this job
|
java.lang.String |
getRandomSeed()
Get the random seed for shuffling the data
|
java.lang.String |
globalInfo()
Help information
|
java.util.Enumeration<Option> |
listOptions() |
static void |
main(java.lang.String[] args) |
java.lang.String |
numInstancesPerRandomizedDataChunkTipText()
Tip text for this property
|
java.lang.String |
numRandomizedDataChunksTipText()
Tip text for this property
|
java.lang.String |
randomSeedTipText()
Tip text for this property
|
void |
run(java.lang.Object toRun,
java.lang.String[] options) |
boolean |
runJob() |
void |
setClassAttribute(java.lang.String c)
Set the name or index of the class attribute ("first" and "last" can also
be used)
|
void |
setCleanOutputDirectory(boolean clean)
Set whether to blow away the output directory before running.
|
void |
setCSVMapTaskOptions(java.lang.String opts)
Set the options to the header job
|
void |
setDontDefaultToLastAttIfClassNotSpecified(boolean d)
Non-command line option to allow clients to turn off the default behavior
of defaulting to setting the last attribute as the class if not explicitly
specified.
|
void |
setNumInstancesPerRandomizedDataChunk(java.lang.String insts)
Set the number of instances that each randomly shuffled data chunk should
have.
|
void |
setNumRandomizedDataChunks(java.lang.String chunks)
Set the number of randomly shuffled data chunks to create.
|
void |
setOptions(java.lang.String[] options) |
void |
setRandomSeed(java.lang.String seed)
Set the random seed for shuffling the data
|
additionalWekaPackagesTipText, cleanOutputDirectory, deubgTipText, getAdditionalWekaPackages, getBaseOptionsOnly, getDebug, getLoggingInterval, getMapNumber, getMapReduceJobConfig, getMapReduceNumber, getPathToWekaJar, getReduceNumber, loggingIntervalTipText, pathToWekaJarTipText, setAdditionalWekaPackages, setDebug, setLoggingInterval, setMapReduceJobConfig, setPathToWekaJar
public java.lang.String globalInfo()
public void setCSVMapTaskOptions(java.lang.String opts)
opts
- options to the header jobpublic java.lang.String getCSVMapTaskOptions()
public java.lang.String numRandomizedDataChunksTipText()
public void setDontDefaultToLastAttIfClassNotSpecified(boolean d)
d
- true if the class is not to be set to the last attribute if the
user has not specifically specified a classpublic boolean getDontDefaultToLastAttIfClassNotSpecified()
public void setNumRandomizedDataChunks(java.lang.String chunks)
chunks
- the number of chunks to create.public java.lang.String getNumRandomizedDataChunks()
public java.lang.String numInstancesPerRandomizedDataChunkTipText()
public void setNumInstancesPerRandomizedDataChunk(java.lang.String insts)
insts
- the number of instances that each randomly shuffled data chunk
should containpublic java.lang.String getNumInstancesPerRandomizedDataChunk()
public java.lang.String classAttributeTipText()
public void setClassAttribute(java.lang.String c)
c
- the name or index of the class attributepublic java.lang.String getClassAttribute()
public java.lang.String randomSeedTipText()
public void setRandomSeed(java.lang.String seed)
seed
- the random seed to usepublic java.lang.String getRandomSeed()
public java.lang.String cleanOutputDirectoryTipText()
public void setCleanOutputDirectory(boolean clean)
clean
- true if the output directory should be deleted before first
(thus forcing the job to run if there was a populated output
directory already).public boolean getCleanOutputDirectory()
public java.util.Enumeration<Option> listOptions()
listOptions
in interface OptionHandler
listOptions
in class HadoopJob
public void setOptions(java.lang.String[] options) throws java.lang.Exception
setOptions
in interface OptionHandler
setOptions
in class HadoopJob
java.lang.Exception
public java.lang.String[] getOptions()
getOptions
in interface OptionHandler
getOptions
in class HadoopJob
public java.lang.String[] getJobOptionsOnly()
public boolean runJob() throws weka.distributed.DistributedWekaException
runJob
in class distributed.core.DistributedJob
weka.distributed.DistributedWekaException
public java.lang.String getRandomizedChunkOutputPath()
public void run(java.lang.Object toRun, java.lang.String[] options) throws java.lang.IllegalArgumentException
run
in interface CommandlineRunnable
java.lang.IllegalArgumentException
public static void main(java.lang.String[] args)