public abstract class HadoopJob extends distributed.core.DistributedJob implements OptionHandler
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
COLT_JAR
The path to the colt.jar
|
static java.lang.String |
DISTRIBUTED_WEKA_BASE_JAR
The path to the distributedWekaBase.jar
|
static java.lang.String |
DISTRIBUTED_WEKA_HADOOP_JAR
The path to the distributedWekaHadoop.jar
|
static java.lang.String |
JCOMMON_JAR
The path to the jcommon jar
|
static java.lang.String |
JFREECHART_JAR
The path to the jfreechart jar
|
static java.lang.String |
LA4J_JAR
The path to the la4j.jar
|
static java.lang.String |
OPEN_CSV_JAR
The path to the opencsv.jar
|
static java.lang.String |
TDIGEST_JAR
The path to the t-digest.jar
|
Constructor and Description |
---|
HadoopJob(java.lang.String jobName,
java.lang.String jobDescription)
Constructor for a HadoopJob
|
Modifier and Type | Method and Description |
---|---|
java.lang.String |
additionalWekaPackagesTipText()
Tip text for this property.
|
void |
cleanOutputDirectory(org.apache.hadoop.mapreduce.Job job)
Deletes the output directory for a job
|
java.lang.String |
deubgTipText()
Tip text for this property
|
java.lang.String |
getAdditionalWekaPackages()
Get a comma separated list of the names of additional weka packages to use
with the job.
|
java.lang.String[] |
getBaseOptionsOnly()
Return the base options only (not the subclasses options or the options
specific to the configuration)
|
boolean |
getDebug()
Get whether to output debug info.
|
java.lang.String |
getLoggingInterval()
Get the interval between output of logging information from running jobs.
|
static int |
getMapNumber(java.lang.String taskID)
Get the number of the map attempt from the supplied task ID string
|
MapReduceJobConfig |
getMapReduceJobConfig()
Get the main configuration to use with this job
|
static int |
getMapReduceNumber(java.lang.String taskID,
java.lang.String prefix)
Extract the number of a map/reduce attempt from the supplied taskID string.
|
java.lang.String[] |
getOptions() |
java.lang.String |
getPathToWekaJar()
Get the path to the weka.jar file.
|
static int |
getReduceNumber(java.lang.String taskID)
Get the number of the reduce attempt from the supplied task ID string
|
java.util.Enumeration<Option> |
listOptions() |
java.lang.String |
loggingIntervalTipText()
Tip text for this property
|
void |
logMessage(java.lang.String message)
Log a message
|
java.lang.String |
pathToWekaJarTipText()
Tip text for this property
|
void |
setAdditionalWekaPackages(java.lang.String packages)
Set a comma separated list of the names of additional weka packages to use
with the job.
|
void |
setDebug(boolean debug)
Set whether to output debug info.
|
void |
setLoggingInterval(java.lang.String li)
Set the interval between output of logging information from running jobs.
|
void |
setMapReduceJobConfig(MapReduceJobConfig conf)
Set the main configuration to use with this job
|
void |
setOptions(java.lang.String[] options) |
void |
setPathToWekaJar(java.lang.String path)
Set the path to the weka.jar file.
|
environmentSubstitute, getAdditionalWekaPackageNames, getJobName, getJobStatus, getLog, logMessage, logMessage, makeOptionsStr, objectRowToInstance, parseInstance, postExecution, preExecution, run, runJob, setEnvironment, setJobDescription, setJobName, setJobStatus, setLog, setStatusMessagePrefix, stackTraceToString, statusMessage, stopJob
public static final java.lang.String DISTRIBUTED_WEKA_HADOOP_JAR
public static final java.lang.String DISTRIBUTED_WEKA_BASE_JAR
public static final java.lang.String OPEN_CSV_JAR
public static final java.lang.String JFREECHART_JAR
public static final java.lang.String JCOMMON_JAR
public static final java.lang.String COLT_JAR
public static final java.lang.String LA4J_JAR
public static final java.lang.String TDIGEST_JAR
public HadoopJob(java.lang.String jobName, java.lang.String jobDescription)
jobName
- the name of the jobjobDescription
- a short description of the jobpublic static int getMapReduceNumber(java.lang.String taskID, java.lang.String prefix)
taskID
- the taskID stringprefix
- the prefix identifying the type of task (i.e. mapper or
reducer)public static int getMapNumber(java.lang.String taskID)
taskID
- the task ID stringpublic static int getReduceNumber(java.lang.String taskID)
taskID
- the task ID stringpublic java.util.Enumeration<Option> listOptions()
listOptions
in interface OptionHandler
public java.lang.String[] getBaseOptionsOnly()
public java.lang.String[] getOptions()
getOptions
in interface OptionHandler
public void setOptions(java.lang.String[] options) throws java.lang.Exception
setOptions
in interface OptionHandler
java.lang.Exception
public MapReduceJobConfig getMapReduceJobConfig()
public void setMapReduceJobConfig(MapReduceJobConfig conf)
conf
- the main configuration to use with this jobpublic java.lang.String deubgTipText()
public boolean getDebug()
public void setDebug(boolean debug)
debug
- true if debug info is to be outputpublic java.lang.String pathToWekaJarTipText()
public java.lang.String getPathToWekaJar()
public void setPathToWekaJar(java.lang.String path)
path
- the path to the weka.jar.public java.lang.String additionalWekaPackagesTipText()
public java.lang.String getAdditionalWekaPackages()
public void setAdditionalWekaPackages(java.lang.String packages)
packages
- a comma separated list of weka packages to use with the jobpublic java.lang.String loggingIntervalTipText()
public java.lang.String getLoggingInterval()
public void setLoggingInterval(java.lang.String li)
li
- the interval (in seconds) between output of logging informationpublic void cleanOutputDirectory(org.apache.hadoop.mapreduce.Job job) throws java.io.IOException
job
- the Job object to delete the output directory forjava.io.IOException
- if a problem occurspublic void logMessage(java.lang.String message)
logMessage
in class distributed.core.DistributedJob
message
- the message to log