public class CanopyClustererSparkJob extends SparkJob implements CommandlineRunnable
SparkJob.NoKeyTextOutputFormat<K,V>
TEST_DATA, TRAINING_DATA
Constructor and Description |
---|
CanopyClustererSparkJob()
Constructor
|
Modifier and Type | Method and Description |
---|---|
java.lang.String |
assignCanopiesToTrainingDataTipText()
Tip text for this property
|
boolean |
getAssignCanopiesToTrainingData()
Get whether to assign canopies to the training data
|
java.lang.String |
getCanopyMapTaskOptions() |
java.lang.String |
getCSVMapTaskOptions()
Get the options to the header job
|
org.apache.spark.api.java.JavaRDD<InstanceWithCanopyAssignments> |
getDataWithCanopiesAssigned() |
java.lang.String |
getMaxNumCanopiesReducePhase()
Get the maximum number of canopies to form in the reduce phase
|
java.lang.String |
getModelFileName()
Get the name only for the model file
|
java.lang.String[] |
getOptions() |
boolean |
getRandomizeAndStratify()
Get whether to randomize (and stratify) the input data or not
|
java.lang.String |
getRandomizedJobOptions()
Get the options for the randomize/stratify task
|
java.lang.String |
getT1ReducePhase()
Get the T1 distance to use in the reduce phase
|
java.lang.String |
getT2ReducePhase()
Get the T2 distance to use in the reduce phase
|
java.util.Enumeration<Option> |
listOptions() |
static void |
main(java.lang.String[] args) |
java.lang.String |
maxNumCanopiesReducePhaseTipText()
Tip text for this property
|
java.lang.String |
modelFileNameTipText()
Tip text for this property
|
void |
run(java.lang.Object toRun,
java.lang.String[] options) |
boolean |
runJobWithContext(org.apache.spark.api.java.JavaSparkContext sparkContext)
Clients to implement
|
void |
setAssignCanopiesToTrainingData(boolean assign)
Set whether to assign canopies to the training data
|
void |
setCanopyMapTaskOptions(java.lang.String opts) |
void |
setCSVMapTaskOptions(java.lang.String opts)
Set the options to the header job
|
void |
setMaxNumCanopiesReducePhase(java.lang.String max)
Set the maximum number of canopies to form in the reduce phase
|
void |
setModelFileName(java.lang.String m)
Set the name only for the model file
|
void |
setOptions(java.lang.String[] options) |
void |
setRandomizeAndStratify(boolean r)
Set whether to randomize (and stratify) the input data or not
|
void |
setRandomizeJobOptions(java.lang.String opts)
Set the options for the randomize/stratify task
|
void |
setT1ReducePhase(java.lang.String t1)
Set the T1 distance to use in the reduce phase
|
void |
setT2ReducePhase(java.lang.String t2)
Set the T2 distance to use in the reduce phase
|
java.lang.String |
t1ReducePhaseTipText()
Tip text for this property
|
java.lang.String |
t2ReducePhaseTipText()
Tip text for this property
|
addSubdirToPath, checkFileExists, createSparkContextForJob, debugTipText, deleteDirectory, getBaseOptionsOnly, getCachingStrategy, getDataset, getDatasets, getDebug, getFSConfigurationForPath, getSizeInBytesOfPath, getSparkContext, getSparkJobConfig, initJob, initSparkLogAppender, loadCSVFile, loadInput, loadInstanceObjectFile, openFileForRead, openFileForWrite, openTextFileForWrite, removeSparkLogAppender, resolveLocalOrOtherFileSystemPath, runJob, setCachingStrategy, setDataset, setDebug, shutdownJob, stringRDDToInstanceRDD
environmentSubstitute, getAdditionalWekaPackageNames, getJobName, getJobStatus, getLog, logMessage, logMessage, logMessage, makeOptionsStr, objectRowToInstance, parseInstance, postExecution, preExecution, setEnvironment, setJobDescription, setJobName, setJobStatus, setLog, setStatusMessagePrefix, stackTraceToString, statusMessage, stopJob
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
postExecution, preExecution
public static void main(java.lang.String[] args)
public java.util.Enumeration<Option> listOptions()
listOptions
in interface OptionHandler
listOptions
in class SparkJob
public java.lang.String[] getOptions()
getOptions
in interface OptionHandler
getOptions
in class SparkJob
public void setOptions(java.lang.String[] options) throws java.lang.Exception
setOptions
in interface OptionHandler
setOptions
in class SparkJob
java.lang.Exception
public java.lang.String getCanopyMapTaskOptions()
public void setCanopyMapTaskOptions(java.lang.String opts)
public java.lang.String getCSVMapTaskOptions()
public void setCSVMapTaskOptions(java.lang.String opts)
opts
- options to the header jobpublic boolean getRandomizeAndStratify()
public void setRandomizeAndStratify(boolean r)
r
- true if the input data is to be randomized and stratifiedpublic java.lang.String modelFileNameTipText()
public java.lang.String getModelFileName()
public void setModelFileName(java.lang.String m)
m
- the name only (not full path) that the model should be saved topublic void setRandomizeJobOptions(java.lang.String opts)
opts
- the options for the randomize taskpublic java.lang.String getRandomizedJobOptions()
public java.lang.String maxNumCanopiesReducePhaseTipText()
public java.lang.String getMaxNumCanopiesReducePhase()
public void setMaxNumCanopiesReducePhase(java.lang.String max)
max
- the maximum number of canopies to form in the reduce phasepublic java.lang.String t1ReducePhaseTipText()
public java.lang.String getT1ReducePhase()
public void setT1ReducePhase(java.lang.String t1)
t1
- the T1 distance to use in the reduce phasepublic java.lang.String t2ReducePhaseTipText()
public java.lang.String getT2ReducePhase()
public void setT2ReducePhase(java.lang.String t2)
t2
- the T2 distance to use in the reduce phasepublic java.lang.String assignCanopiesToTrainingDataTipText()
public boolean getAssignCanopiesToTrainingData()
public void setAssignCanopiesToTrainingData(boolean assign)
assign
- true if the canopies found are to be assigned to the training
data (thus creating an new RDD)public boolean runJobWithContext(org.apache.spark.api.java.JavaSparkContext sparkContext) throws java.io.IOException, weka.distributed.DistributedWekaException
SparkJob
runJobWithContext
in class SparkJob
sparkContext
- the context to usejava.io.IOException
- if a IO problem occursweka.distributed.DistributedWekaException
- if any other problem occurspublic org.apache.spark.api.java.JavaRDD<InstanceWithCanopyAssignments> getDataWithCanopiesAssigned() throws weka.distributed.DistributedWekaException
public void run(java.lang.Object toRun, java.lang.String[] options) throws java.lang.IllegalArgumentException
run
in interface CommandlineRunnable
run
in class distributed.core.DistributedJob
java.lang.IllegalArgumentException