public class ArffHeaderSparkJob extends SparkJob implements CommandlineRunnable, InstancesProducer, TextProducer
SparkJob.NoKeyTextOutputFormat<K,V>
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
CHART_HEIGHT_KEY
key for specifying a chart height to use
|
static java.lang.String |
CHART_WIDTH_KEY
key for specifying a chart width to use
|
static int |
DEFAULT_CHART_HEIGHT
Default height for charts
|
static int |
DEFAULT_CHART_WIDTH
Default width for charts
|
static java.lang.String |
OUTPUT_SUBDIR
Subdirectory in the output directory for storing the ARFF header to
|
TEST_DATA, TRAINING_DATA
Constructor and Description |
---|
ArffHeaderSparkJob()
Constructor
|
Modifier and Type | Method and Description |
---|---|
java.lang.String |
attributeNamesFileTipText()
Tip text for this property
|
java.lang.String |
attributeNamesTipText()
Tip text for this property
|
java.lang.String |
csvToArffTaskOptionsTipText()
Tip text for this property
|
java.lang.String |
getAttributeNames()
Get a comma-separated list of attribute names to use when generating the
ARFF header.
|
java.lang.String |
getAttributeNamesFile()
Get the path to an file containing attribute names to use.
|
java.lang.String |
getCsvToArffTaskOptions()
Get the options to pass on to the underlying csv to arff task
|
Instances |
getHeader()
Get the final header
|
Instances |
getInstances()
Get the final header (calls getHeader())
|
java.lang.String[] |
getJobOptionsOnly()
Get the options specific to this job only.
|
java.lang.String[] |
getOptions() |
java.lang.String |
getOutputHeaderFileName()
Get the name of the header file to create in the output directory.
|
java.lang.String |
getPathToExistingHeader()
Get the path to an previously created header file to use.
|
java.util.List<java.lang.String> |
getSummaryChartAttNames()
Get the names of the attributes in the summary charts
|
java.util.List<java.awt.image.BufferedImage> |
getSummaryCharts()
Get the summary charts (if any)
|
java.lang.String |
getText() |
java.util.Enumeration<Option> |
listOptions() |
static void |
main(java.lang.String[] args)
Main method for executing this job
|
java.lang.String |
outputHeaderFileNameTipText()
The tip text for this property
|
java.lang.String |
pathToExistingHeaderTipText()
The tip text for this property
|
void |
run(java.lang.Object toRun,
java.lang.String[] options) |
boolean |
runJobWithContext(org.apache.spark.api.java.JavaSparkContext sparkContext)
Clients to implement
|
void |
setAttributeNames(java.lang.String names)
Set a comma-separated list of attribute names to use when generating the
ARFF header.
|
void |
setAttributeNamesFile(java.lang.String namesfile)
Set the path to an file containing attribute names to use.
|
void |
setCsvToArffTaskOptions(java.lang.String opts)
Set the options to pass on to the underlying csv to arff task
|
void |
setOptions(java.lang.String[] options) |
void |
setOutputHeaderFileName(java.lang.String name)
Set the name of the header file to create in the output directory.
|
void |
setPathToExistingHeader(java.lang.String path)
Set the path to an previously created header file to use.
|
addSubdirToPath, checkFileExists, createSparkContextForJob, debugTipText, deleteDirectory, getBaseOptionsOnly, getCachingStrategy, getDataset, getDatasets, getDebug, getFSConfigurationForPath, getSizeInBytesOfPath, getSparkContext, getSparkJobConfig, initJob, initSparkLogAppender, loadCSVFile, loadInput, loadInstanceObjectFile, openFileForRead, openFileForWrite, openTextFileForWrite, removeSparkLogAppender, resolveLocalOrOtherFileSystemPath, runJob, setCachingStrategy, setDataset, setDebug, shutdownJob, stringRDDToInstanceRDD
environmentSubstitute, getAdditionalWekaPackageNames, getJobName, getJobStatus, getLog, logMessage, logMessage, logMessage, makeOptionsStr, objectRowToInstance, parseInstance, postExecution, preExecution, setEnvironment, setJobDescription, setJobName, setJobStatus, setLog, setStatusMessagePrefix, stackTraceToString, statusMessage, stopJob
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
postExecution, preExecution
public static final java.lang.String CHART_WIDTH_KEY
public static final java.lang.String CHART_HEIGHT_KEY
public static final int DEFAULT_CHART_WIDTH
public static final int DEFAULT_CHART_HEIGHT
public static final java.lang.String OUTPUT_SUBDIR
public static void main(java.lang.String[] args)
args
- arguments to the jobpublic java.util.Enumeration<Option> listOptions()
listOptions
in interface OptionHandler
listOptions
in class SparkJob
public java.lang.String[] getJobOptionsOnly()
public java.lang.String[] getOptions()
getOptions
in interface OptionHandler
getOptions
in class SparkJob
public void setOptions(java.lang.String[] options) throws java.lang.Exception
setOptions
in interface OptionHandler
setOptions
in class SparkJob
java.lang.Exception
public java.lang.String pathToExistingHeaderTipText()
public java.lang.String getPathToExistingHeader()
public void setPathToExistingHeader(java.lang.String path)
path
- the path to a previously created headerpublic java.lang.String outputHeaderFileNameTipText()
public java.lang.String getOutputHeaderFileName()
public void setOutputHeaderFileName(java.lang.String name)
name
- the name for the ARFF header filepublic java.lang.String attributeNamesTipText()
public java.lang.String getAttributeNames()
public void setAttributeNames(java.lang.String names)
names
- the names of the attributespublic java.lang.String attributeNamesFileTipText()
public java.lang.String getAttributeNamesFile()
public void setAttributeNamesFile(java.lang.String namesfile)
namesfile
- the path to a names file to usepublic java.lang.String csvToArffTaskOptionsTipText()
public java.lang.String getCsvToArffTaskOptions()
public void setCsvToArffTaskOptions(java.lang.String opts)
opts
- options to pass on to the csv to arff map and reduce taskspublic java.util.List<java.awt.image.BufferedImage> getSummaryCharts()
public java.util.List<java.lang.String> getSummaryChartAttNames()
public boolean runJobWithContext(org.apache.spark.api.java.JavaSparkContext sparkContext) throws java.io.IOException, weka.distributed.DistributedWekaException
SparkJob
runJobWithContext
in class SparkJob
sparkContext
- the context to usejava.io.IOException
- if a IO problem occursweka.distributed.DistributedWekaException
- if any other problem occurspublic Instances getHeader()
public Instances getInstances()
getInstances
in interface InstancesProducer
public void run(java.lang.Object toRun, java.lang.String[] options) throws java.lang.IllegalArgumentException
run
in interface CommandlineRunnable
run
in class distributed.core.DistributedJob
java.lang.IllegalArgumentException
public java.lang.String getText()
getText
in interface TextProducer