public class MapReduceJobConfig extends AbstractHadoopJobConfig implements OptionHandler
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
COMBINER_CLASS
Internal key for the name of the combiner class
|
static java.lang.String |
HADOOP_JOB_TRACKER_HOST
Internal key for the Hadoop property for the job tracker host
|
static java.lang.String |
HADOOP_MAPRED_MAX_SPLIT_SIZE
Internal key for the Hadoop 1 property for the maximum block size
|
static java.lang.String |
HADOOP_TASKTRACKER_REDUCE_TASKS_MAXIMUM
Internal key for the Hadoop 1 property for the maximum number of number of
reducers to run per node
|
static java.lang.String |
HADOOP2_MAPRED_MAX_SPLIT_SIZE
Internal key for the Haddop 2 property for the maximum block size
|
static java.lang.String |
HADOOP2_TASKTRACKER_REDUCE_TASKS_MAXIMUM
Internal key for the Hadoop 2 property for the maximum number of number of
reducers to run per node
|
static java.lang.String |
INPUT_FORMAT_CLASS
Internal key for the name of the input format class to use
|
static java.lang.String |
INPUT_PATHS
Internal key for the input path(s) to use for the job
|
static java.lang.String |
MAP_OUTPUT_KEY_CLASS
Internal key for the name of the map output key class to use
|
static java.lang.String |
MAP_OUTPUT_VALUE_CLASS
Internal key for the name of the map output value class to use
|
static java.lang.String |
MAPPER_CLASS
Internal key for the name of the mapper class
|
static java.lang.String |
MAPRED_MAX_SPLIT_SIZE
Internal key for the maximum block size (for splitting data) to use
|
static java.lang.String |
NUM_MAPPERS
Internal key for the number of mappers to use
|
static java.lang.String |
NUM_REDUCERS
Internal key for the number of reducers to use
|
static java.lang.String |
OUTPUT_FORMAT_CLASS
Internal key for the name of the output format class to use
|
static java.lang.String |
OUTPUT_KEY_CLASS
Internal key for the name of the (job/reducer) output key to use
|
static java.lang.String |
OUTPUT_PATH
Internal key for the output path to use for the job
|
static java.lang.String |
OUTPUT_VALUE_CLASS
Internal key for the name of the (job/reducer) output value to use
|
static java.lang.String |
REDUCER_CLASS
Internal key for the name of the reducer class
|
static java.lang.String |
TASK_TRACKER_MAP_MAXIMUM
Internal key for the maximum number of mappers that will run on a node
concurrently
|
static java.lang.String |
YARN_RESOURCE_MANAGER_ADDRESS
Internal key for the Hadoop property for the yarn resource manager address
|
static java.lang.String |
YARN_RESOURCE_MANAGER_SCHEDULER_ADDRESS
Internal key for the Hadoop property for the yarn resource manager
scheduler address.
|
DEFAULT_HOST, DEFAULT_PORT, DEFAULT_PORT_YARN, JOBTRACKER_HOST, JOBTRACKER_PORT
Constructor and Description |
---|
MapReduceJobConfig()
Constructor - sets defaults
|
Modifier and Type | Method and Description |
---|---|
org.apache.hadoop.mapreduce.Job |
configureForHadoop(java.lang.String jobName,
org.apache.hadoop.conf.Configuration conf,
Environment env)
Apply the settings encapsulated in this config and return a Job object
ready for execution.
|
void |
deleteOutputDirectory(org.apache.hadoop.mapreduce.Job job,
Environment env)
Clean the output directory specified for the supplied job
|
java.lang.String |
getCombinerClass()
Get the name of the reducer class (if any) to use.
|
HDFSConfig |
getHDFSConfig()
Get the HDFSConfig to use
|
java.lang.String |
getHDFSHost()
Get the HDFS host (name node)
|
java.lang.String |
getHDFSPort()
Get the HDFS port
|
java.lang.String |
getInputFormatClass()
Get the name of the input format class to use.
|
java.lang.String |
getInputPaths()
Get the input path(s) to use
|
java.lang.String |
getMapOutputKeyClass()
Get the name of the map output key class to use.
|
java.lang.String |
getMapOutputValueClass()
Get the name of the map output value class to use.
|
java.lang.String |
getMapperClass()
Get the mapper class name to use.
|
java.lang.String |
getMapredMaxSplitSize()
Get the maximum split size (in bytes).
|
java.lang.String |
getNumberOfMaps()
Get the number of maps to use.
|
java.lang.String |
getNumberOfReducers()
Get the number of reducers to use.
|
java.lang.String[] |
getOptions() |
java.lang.String |
getOutputFormatClass()
Get the name of the output format class to use.
|
java.lang.String |
getOutputKeyClass()
Get the name of the (reducer) output key class to use
|
java.lang.String |
getOutputPath()
Get the output path to use
|
java.lang.String |
getOutputValueClass()
Get the name of the (reducer) output value class to use
|
java.lang.String |
getReducerClass()
Get the name of the reducer class to use.
|
java.lang.String |
getTaskTrackerMapTasksMaximum()
Get the maximum number of map tasks to run concurrently by a task tracker
(node).
|
java.lang.String |
HDFSHostTipText()
Get the tool tip text for this property
|
java.lang.String |
HDFSPortTipText()
Get the tool tip text for this property
|
java.lang.String |
inputPathsTipText()
Get the tip text for this property
|
java.util.Enumeration<Option> |
listOptions() |
java.lang.String |
numberOfMapsTipText()
Get the tool tip text for this property
|
java.lang.String |
numberOfReducersTipText()
Get the tool tip text for this property
|
java.lang.String |
outputPathTipText()
Get the tip text for this property
|
void |
setCombinerClass(java.lang.String combinerClass)
Set the name of the reducer class (if any) to use.
|
void |
setHDFSConfig(HDFSConfig config)
Set the HDFSConfig to use
|
void |
setHDFSHost(java.lang.String host)
Set the HDFSHost (name node)
|
void |
setHDFSPort(java.lang.String port)
Set the HDFS port
|
void |
setInputFormatClass(java.lang.String inputFormatClass)
Set the name of the input format class to use.
|
void |
setInputPaths(java.lang.String inputPaths)
Set the input path(s) to use
|
void |
setMapOutputKeyClass(java.lang.String mapOutputKeyClass)
Set the name of the map output key class to use.
|
void |
setMapOutputValueClass(java.lang.String mapOutputValueClass)
Set the name of the map output value class to use.
|
void |
setMapperClass(java.lang.String mapperClass)
Set the mapper class name to use.
|
void |
setMapredMaxSplitSize(java.lang.String maxSize)
Set the maximum split size (in bytes).
|
void |
setNumberOfMaps(java.lang.String nM)
Set the number of maps to use.
|
void |
setNumberOfReducers(java.lang.String nR)
Set the number of reducers to use.
|
void |
setOptions(java.lang.String[] options) |
void |
setOutputFormatClass(java.lang.String outputFormatClass)
Set the name of the output format class to use.
|
void |
setOutputKeyClass(java.lang.String outputKeyClass)
Set the name of the (reducer) output key class to use
|
void |
setOutputPath(java.lang.String outputPath)
Set the output path to use
|
void |
setOutputValueClass(java.lang.String outputValueClass)
Set the name of the (reducer) output value class to use
|
void |
setReducerClass(java.lang.String reducerClass)
Set the name of the reducer class to use.
|
void |
setTaskTrackerMapTasksMaximum(java.lang.String mmt)
Set the maximum number of map tasks to run concurrently by a task tracker
(node).
|
java.lang.String |
taskTrackerMapTasksMaximumTipText()
Get the tool tip text for this property
|
getJobTrackerHost, getJobTrackerPort, isHadoop2, jobTrackerHostTipText, jobTrackerPortTipText, setJobTrackerHost, setJobTrackerPort
public static final java.lang.String NUM_MAPPERS
public static final java.lang.String NUM_REDUCERS
public static final java.lang.String TASK_TRACKER_MAP_MAXIMUM
public static final java.lang.String MAPPER_CLASS
public static final java.lang.String REDUCER_CLASS
public static final java.lang.String COMBINER_CLASS
public static final java.lang.String INPUT_FORMAT_CLASS
public static final java.lang.String OUTPUT_FORMAT_CLASS
public static final java.lang.String MAP_OUTPUT_KEY_CLASS
public static final java.lang.String MAP_OUTPUT_VALUE_CLASS
public static final java.lang.String OUTPUT_KEY_CLASS
public static final java.lang.String OUTPUT_VALUE_CLASS
public static final java.lang.String INPUT_PATHS
public static final java.lang.String OUTPUT_PATH
public static final java.lang.String MAPRED_MAX_SPLIT_SIZE
public static final java.lang.String HADOOP_JOB_TRACKER_HOST
public static final java.lang.String YARN_RESOURCE_MANAGER_ADDRESS
public static final java.lang.String YARN_RESOURCE_MANAGER_SCHEDULER_ADDRESS
public static final java.lang.String HADOOP_MAPRED_MAX_SPLIT_SIZE
public static final java.lang.String HADOOP2_MAPRED_MAX_SPLIT_SIZE
public static final java.lang.String HADOOP_TASKTRACKER_REDUCE_TASKS_MAXIMUM
public static final java.lang.String HADOOP2_TASKTRACKER_REDUCE_TASKS_MAXIMUM
public java.util.Enumeration<Option> listOptions()
listOptions
in interface OptionHandler
listOptions
in class distributed.core.DistributedJobConfig
public void setOptions(java.lang.String[] options) throws java.lang.Exception
setOptions
in interface OptionHandler
setOptions
in class distributed.core.DistributedJobConfig
java.lang.Exception
public java.lang.String[] getOptions()
getOptions
in interface OptionHandler
getOptions
in class distributed.core.DistributedJobConfig
public void setHDFSConfig(HDFSConfig config)
config
- the HDFSConfig to usepublic HDFSConfig getHDFSConfig()
public java.lang.String HDFSHostTipText()
public void setHDFSHost(java.lang.String host)
host
- the HDFS hostpublic java.lang.String getHDFSHost()
public java.lang.String HDFSPortTipText()
public void setHDFSPort(java.lang.String port)
port
- the HDFS portpublic java.lang.String getHDFSPort()
public java.lang.String numberOfMapsTipText()
public void setNumberOfMaps(java.lang.String nM)
nM
- the number of maps to usepublic java.lang.String getNumberOfMaps()
public java.lang.String taskTrackerMapTasksMaximumTipText()
public void setTaskTrackerMapTasksMaximum(java.lang.String mmt)
mmt
- the maximum number of map tasks to run concurrently by a task
trackerpublic java.lang.String getTaskTrackerMapTasksMaximum()
public java.lang.String numberOfReducersTipText()
public void setNumberOfReducers(java.lang.String nR)
nR
- the number of reducers to use.public java.lang.String getNumberOfReducers()
public void setMapperClass(java.lang.String mapperClass)
mapperClass
- the mapper class namepublic java.lang.String getMapperClass()
public void setReducerClass(java.lang.String reducerClass)
reducerClass
- the name of the reducer classpublic java.lang.String getReducerClass()
public void setCombinerClass(java.lang.String combinerClass)
combinerClass
- the name of the combiner class to usepublic java.lang.String getCombinerClass()
public void setInputFormatClass(java.lang.String inputFormatClass)
inputFormatClass
- the name of the input format class to usepublic java.lang.String getInputFormatClass()
public void setOutputFormatClass(java.lang.String outputFormatClass)
outputFormatClass
- the name of the output format class to use.public java.lang.String getOutputFormatClass()
public void setMapOutputKeyClass(java.lang.String mapOutputKeyClass)
mapOutputKeyClass
- the name of the map output key classpublic java.lang.String getMapOutputKeyClass()
public void setMapOutputValueClass(java.lang.String mapOutputValueClass)
mapOutputValueClass
- the name of the map output value classpublic java.lang.String getMapOutputValueClass()
public void setOutputKeyClass(java.lang.String outputKeyClass)
outputKeyClass
- the name of the output key class to usepublic java.lang.String getOutputKeyClass()
public void setOutputValueClass(java.lang.String outputValueClass)
outputValueClass
- the name of the output value class to usepublic java.lang.String getOutputValueClass()
public java.lang.String inputPathsTipText()
public void setInputPaths(java.lang.String inputPaths)
inputPaths
- the input paths to usepublic java.lang.String getInputPaths()
public java.lang.String outputPathTipText()
public void setOutputPath(java.lang.String outputPath)
outputPath
- the output path to usepublic java.lang.String getOutputPath()
public void setMapredMaxSplitSize(java.lang.String maxSize)
maxSize
- the maximum split size (in bytes)public java.lang.String getMapredMaxSplitSize()
public org.apache.hadoop.mapreduce.Job configureForHadoop(java.lang.String jobName, org.apache.hadoop.conf.Configuration conf, Environment env) throws java.io.IOException, java.lang.ClassNotFoundException
jobName
- the name of the jobconf
- the Configuration object that will be wrapped in the Jobenv
- environment variablesjava.io.IOException
- if a problem occursjava.lang.ClassNotFoundException
- if various classes are not foundpublic void deleteOutputDirectory(org.apache.hadoop.mapreduce.Job job, Environment env) throws java.io.IOException
job
- the job to clean the output directory forenv
- environment variablesjava.io.IOException
- if a problem occurs