public class RandomizedDataChunkHadoopJob extends HadoopJob implements CommandlineRunnable
COLT_JAR, DISTRIBUTED_WEKA_BASE_JAR, DISTRIBUTED_WEKA_HADOOP_JAR, JCOMMON_JAR, JFREECHART_JAR, LA4J_JAR, OPEN_CSV_JAR, TDIGEST_JAR
Constructor and Description |
---|
RandomizedDataChunkHadoopJob() |
Modifier and Type | Method and Description |
---|---|
java.lang.String |
classAttributeTipText()
Tip text for this property
|
java.lang.String |
cleanOutputDirectoryTipText()
Tip text for this property
|
java.lang.String |
getClassAttribute()
Get the name or index of the class attribute ("first" and "last" can also
be used)
|
boolean |
getCleanOutputDirectory()
Get whether to blow away the output directory before running.
|
java.lang.String |
getCSVMapTaskOptions()
Get the options to the header job
|
boolean |
getDontDefaultToLastAttIfClassNotSpecified()
Non-command line option to allow clients to turn off the default behavior
of defaulting to setting the last attribute as the class if not explicitly
specified.
|
java.lang.String[] |
getJobOptionsOnly()
Get the options for this job only
|
java.lang.String |
getNumInstancesPerRandomizedDataChunk()
Get the number of instances that each randomly shuffled data chunk should
have.
|
java.lang.String |
getNumRandomizedDataChunks()
Get the number of randomly shuffled data chunks to create.
|
java.lang.String[] |
getOptions() |
java.lang.String |
getRandomizedChunkOutputPath()
Get the path to the output directory for this job
|
java.lang.String |
getRandomSeed()
Get the random seed for shuffling the data
|
java.lang.String |
globalInfo()
Help information
|
java.util.Enumeration<Option> |
listOptions() |
static void |
main(java.lang.String[] args) |
java.lang.String |
numInstancesPerRandomizedDataChunkTipText()
Tip text for this property
|
java.lang.String |
numRandomizedDataChunksTipText()
Tip text for this property
|
java.lang.String |
randomSeedTipText()
Tip text for this property
|
void |
run(java.lang.Object toRun,
java.lang.String[] options) |
boolean |
runJob() |
void |
setClassAttribute(java.lang.String c)
Set the name or index of the class attribute ("first" and "last" can also
be used)
|
void |
setCleanOutputDirectory(boolean clean)
Set whether to blow away the output directory before running.
|
void |
setCSVMapTaskOptions(java.lang.String opts)
Set the options to the header job
|
void |
setDontDefaultToLastAttIfClassNotSpecified(boolean d)
Non-command line option to allow clients to turn off the default behavior
of defaulting to setting the last attribute as the class if not explicitly
specified.
|
void |
setNumInstancesPerRandomizedDataChunk(java.lang.String insts)
Set the number of instances that each randomly shuffled data chunk should
have.
|
void |
setNumRandomizedDataChunks(java.lang.String chunks)
Set the number of randomly shuffled data chunks to create.
|
void |
setOptions(java.lang.String[] options) |
void |
setRandomSeed(java.lang.String seed)
Set the random seed for shuffling the data
|
additionalWekaPackagesTipText, cleanOutputDirectory, deubgTipText, getAdditionalWekaPackages, getBaseOptionsOnly, getDebug, getLoggingInterval, getMapNumber, getMapReduceJobConfig, getMapReduceNumber, getPathToWekaJar, getReduceNumber, loggingIntervalTipText, logMessage, pathToWekaJarTipText, setAdditionalWekaPackages, setDebug, setLoggingInterval, setMapReduceJobConfig, setPathToWekaJar
environmentSubstitute, getAdditionalWekaPackageNames, getJobName, getJobStatus, getLog, logMessage, logMessage, makeOptionsStr, objectRowToInstance, parseInstance, postExecution, preExecution, setEnvironment, setJobDescription, setJobName, setJobStatus, setLog, setStatusMessagePrefix, stackTraceToString, statusMessage, stopJob
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
postExecution, preExecution
public java.lang.String globalInfo()
public void setCSVMapTaskOptions(java.lang.String opts)
opts
- options to the header jobpublic java.lang.String getCSVMapTaskOptions()
public java.lang.String numRandomizedDataChunksTipText()
public void setDontDefaultToLastAttIfClassNotSpecified(boolean d)
d
- true if the class is not to be set to the last attribute if the
user has not specifically specified a classpublic boolean getDontDefaultToLastAttIfClassNotSpecified()
public void setNumRandomizedDataChunks(java.lang.String chunks)
chunks
- the number of chunks to create.public java.lang.String getNumRandomizedDataChunks()
public java.lang.String numInstancesPerRandomizedDataChunkTipText()
public void setNumInstancesPerRandomizedDataChunk(java.lang.String insts)
insts
- the number of instances that each randomly shuffled data chunk
should containpublic java.lang.String getNumInstancesPerRandomizedDataChunk()
public java.lang.String classAttributeTipText()
public void setClassAttribute(java.lang.String c)
c
- the name or index of the class attributepublic java.lang.String getClassAttribute()
public java.lang.String randomSeedTipText()
public void setRandomSeed(java.lang.String seed)
seed
- the random seed to usepublic java.lang.String getRandomSeed()
public java.lang.String cleanOutputDirectoryTipText()
public void setCleanOutputDirectory(boolean clean)
clean
- true if the output directory should be deleted before first
(thus forcing the job to run if there was a populated output
directory already).public boolean getCleanOutputDirectory()
public java.util.Enumeration<Option> listOptions()
listOptions
in interface OptionHandler
listOptions
in class HadoopJob
public void setOptions(java.lang.String[] options) throws java.lang.Exception
setOptions
in interface OptionHandler
setOptions
in class HadoopJob
java.lang.Exception
public java.lang.String[] getOptions()
getOptions
in interface OptionHandler
getOptions
in class HadoopJob
public java.lang.String[] getJobOptionsOnly()
public boolean runJob() throws weka.distributed.DistributedWekaException
runJob
in class distributed.core.DistributedJob
weka.distributed.DistributedWekaException
public java.lang.String getRandomizedChunkOutputPath()
public void run(java.lang.Object toRun, java.lang.String[] options) throws java.lang.IllegalArgumentException
run
in interface CommandlineRunnable
run
in class distributed.core.DistributedJob
java.lang.IllegalArgumentException
public static void main(java.lang.String[] args)