public class KMeansClustererHadoopJob extends HadoopJob implements CommandlineRunnable, TextProducer, ClustererProducer
COLT_JAR, DISTRIBUTED_WEKA_BASE_JAR, DISTRIBUTED_WEKA_HADOOP_JAR, JCOMMON_JAR, JFREECHART_JAR, LA4J_JAR, OPEN_CSV_JAR, TDIGEST_JAR
Constructor and Description |
---|
KMeansClustererHadoopJob()
Constructor
|
Modifier and Type | Method and Description |
---|---|
java.lang.String |
convergenceToleranceTipText()
Tip text for this property
|
java.lang.String |
displayCentroidStdDevsTipText()
Tip text for this property
|
Clusterer |
getClusterer() |
double |
getConvergenceTolerance()
Get the convergence tolerance
|
java.lang.String |
getCSVMapTaskOptions()
Get the options to the header job
|
boolean |
getDisplayCentroidStdDevs()
Get whether to display the standard deviations of centroids in textual
output of the model
|
boolean |
getInitWithRandomCentroids()
Get whether to initialize with randomly selected centroids rather than
using the k-means|| initialization procedure.
|
java.lang.String[] |
getJobOptionsOnly()
Get the options for this job only
|
java.lang.String |
getKMeansParallelInitSteps()
Get the number of iterations of the k-means|| initialization routine to
perform
|
java.lang.String |
getModelFileName()
Get the name only for the model file
|
java.lang.String |
getNumClusters()
Get the number of clusters to find
|
java.lang.String |
getNumIterations()
Get the maximum number of k-means iterations to perform
|
java.lang.String |
getNumNodesInCluster()
Get the number of nodes in the Hadoop cluster
|
java.lang.String |
getNumRuns()
Get the number of k-means runs to perform in parallel
|
java.lang.String[] |
getOptions() |
java.lang.String |
getRandomizedJobOptions()
Get the options for the randomize/stratify task
|
boolean |
getRandomlyShuffleData()
Get whether to randomly shuffle the order of the instances in the input
data before clustering
|
java.lang.String |
getRandomlyShuffleDataNumChunks()
Get the number of randomly shuffled data chunks to create.
|
java.lang.String |
getRandomSeed()
Get the seed for random number generation
|
java.lang.String |
getText() |
Instances |
getTrainingHeader() |
java.lang.String |
globalInfo()
Help information
|
java.lang.String |
initWithRandomCentroidsTipText()
Tip text for this property
|
java.lang.String |
kMeansParallelInitStepsTipText()
Tip text for this property.
|
java.util.Enumeration<Option> |
listOptions() |
static void |
main(java.lang.String[] args)
Main method for executing this job from the command line
|
java.lang.String |
modelFileNameTipText()
Tip text for this property
|
java.lang.String |
numClustersTipText()
Tip text for this property.
|
java.lang.String |
numIterationsTipText()
Tip text for this property.
|
java.lang.String |
numNodesInClusterTipText()
Tip text for this property
|
java.lang.String |
numRunsTipText()
Tip text for this property.
|
java.lang.String |
randomlyShuffleDataNumChunksTipText()
Tip text for this property
|
java.lang.String |
randomlyShuffleDataTipText()
Tip text for this property
|
java.lang.String |
randomSeedTipText()
Tip text for this property.
|
void |
run(java.lang.Object toRun,
java.lang.String[] args) |
boolean |
runJob() |
void |
setConvergenceTolerance(double tol)
Set the convergence tolerance
|
void |
setCSVMapTaskOptions(java.lang.String opts)
Set the options to the header job
|
void |
setDisplayCentroidStdDevs(boolean d)
Set whether to display the standard deviations of centroids in textual
output of the model
|
void |
setInitWithRandomCentroids(boolean init)
Set whether to initialize with randomly selected centroids rather than
using the k-means|| initialization procedure.
|
void |
setKMeansParallelInitSteps(java.lang.String steps)
Set the number of iterations of the k-means|| initialization routine to
perform
|
void |
setModelFileName(java.lang.String m)
Set the name only for the model file
|
void |
setNumClusters(java.lang.String numClusters)
Set the number of clusters to find
|
void |
setNumIterations(java.lang.String numIts)
Set the maximum number of k-means iterations to perform
|
void |
setNumNodesInCluster(java.lang.String n)
Set the number of nodes in the Hadoop cluster
|
void |
setNumRuns(java.lang.String numRuns)
Set the number of k-means runs to perform in parallel
|
void |
setOptions(java.lang.String[] options) |
void |
setRandomizeJobOptions(java.lang.String opts)
Set the options for the randomize/stratify task
|
void |
setRandomlyShuffleData(boolean r)
Set whether to randomly shuffle the order of the instances in the input
data before clustering
|
void |
setRandomlyShuffleDataNumChunks(java.lang.String chunks)
Set the number of randomly shuffled data chunks to create.
|
void |
setRandomSeed(java.lang.String seed)
Set the seed for random number generation
|
void |
stopJob() |
additionalWekaPackagesTipText, cleanOutputDirectory, deubgTipText, getAdditionalWekaPackages, getBaseOptionsOnly, getDebug, getLoggingInterval, getMapNumber, getMapReduceJobConfig, getMapReduceNumber, getPathToWekaJar, getReduceNumber, loggingIntervalTipText, logMessage, pathToWekaJarTipText, setAdditionalWekaPackages, setDebug, setLoggingInterval, setMapReduceJobConfig, setPathToWekaJar
environmentSubstitute, getAdditionalWekaPackageNames, getJobName, getJobStatus, getLog, logMessage, logMessage, makeOptionsStr, objectRowToInstance, parseInstance, postExecution, preExecution, setEnvironment, setJobDescription, setJobName, setJobStatus, setLog, setStatusMessagePrefix, stackTraceToString, statusMessage
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
postExecution, preExecution
public java.lang.String globalInfo()
public java.lang.String convergenceToleranceTipText()
public void setConvergenceTolerance(double tol)
tol
- the convergence tolerancepublic double getConvergenceTolerance()
public java.lang.String initWithRandomCentroidsTipText()
public void setInitWithRandomCentroids(boolean init)
init
- true if randomly selected initial centroids are to be usedpublic boolean getInitWithRandomCentroids()
public java.lang.String numNodesInClusterTipText()
public void setNumNodesInCluster(java.lang.String n)
n
- the number of nodes in the Hadoop clusterpublic java.lang.String getNumNodesInCluster()
public java.lang.String randomlyShuffleDataNumChunksTipText()
public void setRandomlyShuffleDataNumChunks(java.lang.String chunks)
chunks
- the number of chunks to create.public java.lang.String getRandomlyShuffleDataNumChunks()
public java.lang.String modelFileNameTipText()
public void setModelFileName(java.lang.String m)
m
- the name only (not full path) that the model should be saved topublic java.lang.String getModelFileName()
public java.lang.String randomlyShuffleDataTipText()
public void setRandomlyShuffleData(boolean r)
r
- true if the data should be randomly shuffledpublic boolean getRandomlyShuffleData()
public java.lang.String numClustersTipText()
public void setNumClusters(java.lang.String numClusters)
numClusters
- the number of clusters to findpublic java.lang.String getNumClusters()
public java.lang.String numRunsTipText()
public void setNumRuns(java.lang.String numRuns)
numRuns
- the number of k-means runs to perform in parallelpublic java.lang.String getNumRuns()
public java.lang.String numIterationsTipText()
public void setNumIterations(java.lang.String numIts)
numIts
- the maximum number of iterations to performpublic java.lang.String getNumIterations()
public java.lang.String randomSeedTipText()
public void setRandomSeed(java.lang.String seed)
seed
- the seed for the random number generatorpublic java.lang.String getRandomSeed()
public java.lang.String kMeansParallelInitStepsTipText()
public void setKMeansParallelInitSteps(java.lang.String steps)
steps
- the number of iterations of the k-means|| init routine to
performpublic java.lang.String getKMeansParallelInitSteps()
public void setRandomizeJobOptions(java.lang.String opts)
opts
- the options for the randomize taskpublic java.lang.String getRandomizedJobOptions()
public java.lang.String getCSVMapTaskOptions()
public void setCSVMapTaskOptions(java.lang.String opts)
opts
- options to the header jobpublic java.lang.String displayCentroidStdDevsTipText()
public void setDisplayCentroidStdDevs(boolean d)
d
- true if standard deviations are to be displayedpublic boolean getDisplayCentroidStdDevs()
public java.util.Enumeration<Option> listOptions()
listOptions
in interface OptionHandler
listOptions
in class HadoopJob
public void setOptions(java.lang.String[] options) throws java.lang.Exception
setOptions
in interface OptionHandler
setOptions
in class HadoopJob
java.lang.Exception
public java.lang.String[] getJobOptionsOnly()
public java.lang.String[] getOptions()
getOptions
in interface OptionHandler
getOptions
in class HadoopJob
public boolean runJob() throws weka.distributed.DistributedWekaException
runJob
in class distributed.core.DistributedJob
weka.distributed.DistributedWekaException
public Clusterer getClusterer()
getClusterer
in interface ClustererProducer
public Instances getTrainingHeader()
getTrainingHeader
in interface ClustererProducer
public java.lang.String getText()
getText
in interface TextProducer
public void stopJob()
stopJob
in class distributed.core.DistributedJob
public static void main(java.lang.String[] args)
args
- arguments to the jobpublic void run(java.lang.Object toRun, java.lang.String[] args)
run
in interface CommandlineRunnable
run
in class distributed.core.DistributedJob