public class KMeansClustererHadoopJob extends HadoopJob implements CommandlineRunnable, TextProducer, ClustererProducer
COLT_JAR, DISTRIBUTED_WEKA_BASE_JAR, DISTRIBUTED_WEKA_HADOOP_JAR, JCOMMON_JAR, JFREECHART_JAR, LA4J_JAR, OPEN_CSV_JAR| Constructor and Description |
|---|
KMeansClustererHadoopJob()
Constructor
|
| Modifier and Type | Method and Description |
|---|---|
java.lang.String |
convergenceToleranceTipText()
Tip text for this property
|
java.lang.String |
displayCentroidStdDevsTipText()
Tip text for this property
|
Clusterer |
getClusterer() |
double |
getConvergenceTolerance()
Get the convergence tolerance
|
java.lang.String |
getCSVMapTaskOptions()
Get the options to the header job
|
boolean |
getDisplayCentroidStdDevs()
Get whether to display the standard deviations of centroids in textual
output of the model
|
boolean |
getInitWithRandomCentroids()
Get whether to initialize with randomly selected centroids rather than
using the k-means|| initialization procedure.
|
java.lang.String[] |
getJobOptionsOnly()
Get the options for this job only
|
java.lang.String |
getKMeansParallelInitSteps()
Get the number of iterations of the k-means|| initialization routine to
perform
|
java.lang.String |
getModelFileName()
Get the name only for the model file
|
java.lang.String |
getNumClusters()
Get the number of clusters to find
|
java.lang.String |
getNumIterations()
Get the maximum number of k-means iterations to perform
|
java.lang.String |
getNumNodesInCluster()
Get the number of nodes in the Hadoop cluster
|
java.lang.String |
getNumRuns()
Get the number of k-means runs to perform in parallel
|
java.lang.String[] |
getOptions() |
java.lang.String |
getRandomizedJobOptions()
Get the options for the randomize/stratify task
|
boolean |
getRandomlyShuffleData()
Get whether to randomly shuffle the order of the instances in the input
data before clustering
|
java.lang.String |
getRandomlyShuffleDataNumChunks()
Get the number of randomly shuffled data chunks to create.
|
java.lang.String |
getRandomSeed()
Get the seed for random number generation
|
java.lang.String |
getText() |
Instances |
getTrainingHeader() |
java.lang.String |
globalInfo()
Help information
|
java.lang.String |
initWithRandomCentroidsTipText()
Tip text for this property
|
java.lang.String |
kMeansParallelInitStepsTipText()
Tip text for this property.
|
java.util.Enumeration<Option> |
listOptions() |
static void |
main(java.lang.String[] args)
Main method for executing this job from the command line
|
java.lang.String |
modelFileNameTipText()
Tip text for this property
|
java.lang.String |
numClustersTipText()
Tip text for this property.
|
java.lang.String |
numIterationsTipText()
Tip text for this property.
|
java.lang.String |
numNodesInClusterTipText()
Tip text for this property
|
java.lang.String |
numRunsTipText()
Tip text for this property.
|
java.lang.String |
randomlyShuffleDataNumChunksTipText()
Tip text for this property
|
java.lang.String |
randomlyShuffleDataTipText()
Tip text for this property
|
java.lang.String |
randomSeedTipText()
Tip text for this property.
|
void |
run(java.lang.Object toRun,
java.lang.String[] args) |
boolean |
runJob() |
void |
setConvergenceTolerance(double tol)
Set the convergence tolerance
|
void |
setCSVMapTaskOptions(java.lang.String opts)
Set the options to the header job
|
void |
setDisplayCentroidStdDevs(boolean d)
Set whether to display the standard deviations of centroids in textual
output of the model
|
void |
setInitWithRandomCentroids(boolean init)
Set whether to initialize with randomly selected centroids rather than
using the k-means|| initialization procedure.
|
void |
setKMeansParallelInitSteps(java.lang.String steps)
Set the number of iterations of the k-means|| initialization routine to
perform
|
void |
setModelFileName(java.lang.String m)
Set the name only for the model file
|
void |
setNumClusters(java.lang.String numClusters)
Set the number of clusters to find
|
void |
setNumIterations(java.lang.String numIts)
Set the maximum number of k-means iterations to perform
|
void |
setNumNodesInCluster(java.lang.String n)
Set the number of nodes in the Hadoop cluster
|
void |
setNumRuns(java.lang.String numRuns)
Set the number of k-means runs to perform in parallel
|
void |
setOptions(java.lang.String[] options) |
void |
setRandomizeJobOptions(java.lang.String opts)
Set the options for the randomize/stratify task
|
void |
setRandomlyShuffleData(boolean r)
Set whether to randomly shuffle the order of the instances in the input
data before clustering
|
void |
setRandomlyShuffleDataNumChunks(java.lang.String chunks)
Set the number of randomly shuffled data chunks to create.
|
void |
setRandomSeed(java.lang.String seed)
Set the seed for random number generation
|
void |
stopJob() |
additionalWekaPackagesTipText, cleanOutputDirectory, deubgTipText, getAdditionalWekaPackages, getBaseOptionsOnly, getDebug, getLoggingInterval, getMapNumber, getMapReduceJobConfig, getMapReduceNumber, getPathToWekaJar, getReduceNumber, loggingIntervalTipText, pathToWekaJarTipText, setAdditionalWekaPackages, setDebug, setLoggingInterval, setMapReduceJobConfig, setPathToWekaJarpublic java.lang.String globalInfo()
public java.lang.String convergenceToleranceTipText()
public void setConvergenceTolerance(double tol)
tol - the convergence tolerancepublic double getConvergenceTolerance()
public java.lang.String initWithRandomCentroidsTipText()
public void setInitWithRandomCentroids(boolean init)
init - true if randomly selected initial centroids are to be usedpublic boolean getInitWithRandomCentroids()
public java.lang.String numNodesInClusterTipText()
public void setNumNodesInCluster(java.lang.String n)
n - the number of nodes in the Hadoop clusterpublic java.lang.String getNumNodesInCluster()
public java.lang.String randomlyShuffleDataNumChunksTipText()
public void setRandomlyShuffleDataNumChunks(java.lang.String chunks)
chunks - the number of chunks to create.public java.lang.String getRandomlyShuffleDataNumChunks()
public java.lang.String modelFileNameTipText()
public void setModelFileName(java.lang.String m)
m - the name only (not full path) that the model should be saved topublic java.lang.String getModelFileName()
public java.lang.String randomlyShuffleDataTipText()
public void setRandomlyShuffleData(boolean r)
r - true if the data should be randomly shuffledpublic boolean getRandomlyShuffleData()
public java.lang.String numClustersTipText()
public void setNumClusters(java.lang.String numClusters)
numClusters - the number of clusters to findpublic java.lang.String getNumClusters()
public java.lang.String numRunsTipText()
public void setNumRuns(java.lang.String numRuns)
numRuns - the number of k-means runs to perform in parallelpublic java.lang.String getNumRuns()
public java.lang.String numIterationsTipText()
public void setNumIterations(java.lang.String numIts)
numIts - the maximum number of iterations to performpublic java.lang.String getNumIterations()
public java.lang.String randomSeedTipText()
public void setRandomSeed(java.lang.String seed)
seed - the seed for the random number generatorpublic java.lang.String getRandomSeed()
public java.lang.String kMeansParallelInitStepsTipText()
public void setKMeansParallelInitSteps(java.lang.String steps)
steps - the number of iterations of the k-means|| init routine to
performpublic java.lang.String getKMeansParallelInitSteps()
public void setRandomizeJobOptions(java.lang.String opts)
opts - the options for the randomize taskpublic java.lang.String getRandomizedJobOptions()
public java.lang.String getCSVMapTaskOptions()
public void setCSVMapTaskOptions(java.lang.String opts)
opts - options to the header jobpublic java.lang.String displayCentroidStdDevsTipText()
public void setDisplayCentroidStdDevs(boolean d)
d - true if standard deviations are to be displayedpublic boolean getDisplayCentroidStdDevs()
public java.util.Enumeration<Option> listOptions()
listOptions in interface OptionHandlerlistOptions in class HadoopJobpublic void setOptions(java.lang.String[] options)
throws java.lang.Exception
setOptions in interface OptionHandlersetOptions in class HadoopJobjava.lang.Exceptionpublic java.lang.String[] getJobOptionsOnly()
public java.lang.String[] getOptions()
getOptions in interface OptionHandlergetOptions in class HadoopJobpublic boolean runJob()
throws weka.distributed.DistributedWekaException
runJob in class distributed.core.DistributedJobweka.distributed.DistributedWekaExceptionpublic Clusterer getClusterer()
getClusterer in interface ClustererProducerpublic Instances getTrainingHeader()
getTrainingHeader in interface ClustererProducerpublic java.lang.String getText()
getText in interface TextProducerpublic void stopJob()
stopJob in class distributed.core.DistributedJobpublic static void main(java.lang.String[] args)
args - arguments to the jobpublic void run(java.lang.Object toRun,
java.lang.String[] args)
run in interface CommandlineRunnable