org.apache.hadoop.mapred
Class JobConf

java.lang.Object
  extended by org.apache.hadoop.conf.Configuration
      extended by org.apache.hadoop.mapred.JobConf

public class JobConf
extends Configuration

A map/reduce job configuration. This names the Mapper, combiner (if any), Partitioner, Reducer, InputFormat, and OutputFormat implementations to be used. It also indicates the set of input files, and where the output files should be written.


Constructor Summary
JobConf()
          Construct a map/reduce job configuration.
JobConf(Class exampleClass)
          Construct a map/reduce job configuration.
JobConf(Configuration conf)
          Construct a map/reduce job configuration.
JobConf(Configuration conf, Class exampleClass)
          Construct a map/reduce job configuration.
JobConf(Path config)
          Construct a map/reduce configuration.
JobConf(String config)
          Construct a map/reduce configuration.
 
Method Summary
 void addInputPath(Path dir)
           
 void deleteLocalFiles()
           
 void deleteLocalFiles(String subdir)
           
 Class<? extends Reducer> getCombinerClass()
           
 boolean getCompressMapOutput()
          Are the outputs of the maps be compressed?
 InputFormat getInputFormat()
           
 Class getInputKeyClass()
          Deprecated. Call RecordReader.createKey().
 Path[] getInputPaths()
           
 Class getInputValueClass()
          Deprecated. Call RecordReader.createValue().
 String getJar()
           
 String getJobName()
          Get the user-specified job name.
 boolean getKeepFailedTaskFiles()
          Should the temporary files for failed tasks be kept?
 String getKeepTaskFilesPattern()
          Get the regular expression that is matched against the task names to see if we need to keep the files.
 String[] getLocalDirs()
           
 Path getLocalPath(String pathString)
          Constructs a local file name.
 SequenceFile.CompressionType getMapOutputCompressionType()
          Get the compression type for the map outputs.
 Class<? extends CompressionCodec> getMapOutputCompressorClass(Class<? extends CompressionCodec> defaultValue)
          Get the codec for compressing the map outputs
 Class<? extends WritableComparable> getMapOutputKeyClass()
          Get the key class for the map output data.
 Class<? extends Writable> getMapOutputValueClass()
          Get the value class for the map output data.
 Class<? extends Mapper> getMapperClass()
           
 Class<? extends MapRunnable> getMapRunnerClass()
           
 int getMaxMapAttempts()
          Get the configured number of maximum attempts that will be made to run a map task, as specified by the mapred.map.max.attempts property.
 int getMaxMapTaskFailuresPercent()
          Get the maximum percentage of map tasks that can fail without the job being aborted.
 int getMaxReduceAttempts()
          Get the configured number of maximum attempts that will be made to run a reduce task, as specified by the mapred.reduce.max.attempts property.
 int getMaxReduceTaskFailuresPercent()
          Get the maximum percentage of reduce tasks that can fail without the job being aborted.
 int getMaxTaskFailuresPerTracker()
          Get the maximum no.
 int getNumMapTasks()
           
 int getNumReduceTasks()
           
 OutputFormat getOutputFormat()
           
 Class<? extends WritableComparable> getOutputKeyClass()
           
 WritableComparator getOutputKeyComparator()
           
 Path getOutputPath()
           
 Class<? extends Writable> getOutputValueClass()
           
 WritableComparator getOutputValueGroupingComparator()
          Get the user defined comparator for grouping values.
 Class<? extends Partitioner> getPartitionerClass()
           
 Class<? extends Reducer> getReducerClass()
           
 boolean getSpeculativeExecution()
          Should speculative execution be used for this job?
 Path getSystemDir()
           
 String getUser()
          Get the reported username for this job.
 Path getWorkingDirectory()
          Get the current working directory for the default file system.
 void setCombinerClass(Class<? extends Reducer> theClass)
           
 void setCompressMapOutput(boolean compress)
          Should the map outputs be compressed before transfer? Uses the SequenceFile compression.
 void setInputFormat(Class<? extends InputFormat> theClass)
           
 void setInputKeyClass(Class theClass)
          Deprecated. Not used
 void setInputPath(Path dir)
           
 void setInputValueClass(Class theClass)
          Deprecated. Not used
 void setJar(String jar)
           
 void setJarByClass(Class cls)
          Set the job's jar file by finding an example class location.
 void setJobName(String name)
          Set the user-specified job name.
 void setKeepFailedTaskFiles(boolean keep)
          Set whether the framework should keep the intermediate files for failed tasks.
 void setKeepTaskFilesPattern(String pattern)
          Set a regular expression for task names that should be kept.
 void setMapOutputCompressionType(SequenceFile.CompressionType style)
          Set the compression type for the map outputs.
 void setMapOutputCompressorClass(Class<? extends CompressionCodec> codecClass)
          Set the given class as the compression codec for the map outputs.
 void setMapOutputKeyClass(Class<? extends WritableComparable> theClass)
          Set the key class for the map output data.
 void setMapOutputValueClass(Class<? extends Writable> theClass)
          Set the value class for the map output data.
 void setMapperClass(Class<? extends Mapper> theClass)
           
 void setMapRunnerClass(Class<? extends MapRunnable> theClass)
           
 void setMaxMapAttempts(int n)
          Expert: Set the number of maximum attempts that will be made to run a map task
 void setMaxMapTaskFailuresPercent(int percent)
          Set the maximum percentage of map tasks that can fail without the job being aborted.
 void setMaxReduceAttempts(int n)
          Expert: Set the number of maximum attempts that will be made to run a reduce task
 void setMaxReduceTaskFailuresPercent(int percent)
          Set the maximum percentage of reduce tasks that can fail without the job being aborted.
 void setMaxTaskFailuresPerTracker(int noFailures)
          Set the maximum no.
 void setNumMapTasks(int n)
           
 void setNumReduceTasks(int n)
           
 void setOutputFormat(Class<? extends OutputFormat> theClass)
           
 void setOutputKeyClass(Class<? extends WritableComparable> theClass)
           
 void setOutputKeyComparatorClass(Class<? extends WritableComparator> theClass)
           
 void setOutputPath(Path dir)
           
 void setOutputValueClass(Class<? extends Writable> theClass)
           
 void setOutputValueGroupingComparator(Class theClass)
          Set the user defined comparator for grouping values.
 void setPartitionerClass(Class<? extends Partitioner> theClass)
           
 void setReducerClass(Class<? extends Reducer> theClass)
           
 void setSpeculativeExecution(boolean new_val)
          Turn on or off speculative execution for this job.
 void setUser(String user)
          Set the reported username for this job.
 void setWorkingDirectory(Path dir)
          Set the current working directory for the default file system
 
Methods inherited from class org.apache.hadoop.conf.Configuration
addDefaultResource, addDefaultResource, addDefaultResource, addFinalResource, addFinalResource, addFinalResource, entries, get, get, get, getBoolean, getClass, getClass, getClassByName, getClassLoader, getConfResourceAsInputStream, getConfResourceAsReader, getFile, getFloat, getInt, getLocalPath, getLong, getObject, getResource, getStrings, main, set, setBoolean, setClass, setClassLoader, setInt, setLong, setObject, setQuietMode, toString, write
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
 

Constructor Detail

JobConf

public JobConf()
Construct a map/reduce job configuration.


JobConf

public JobConf(Class exampleClass)
Construct a map/reduce job configuration.

Parameters:
exampleClass - a class whose containing jar is used as the job's jar.

JobConf

public JobConf(Configuration conf)
Construct a map/reduce job configuration.

Parameters:
conf - a Configuration whose settings will be inherited.

JobConf

public JobConf(Configuration conf,
               Class exampleClass)
Construct a map/reduce job configuration.

Parameters:
conf - a Configuration whose settings will be inherited.
exampleClass - a class whose containing jar is used as the job's jar.

JobConf

public JobConf(String config)
Construct a map/reduce configuration.

Parameters:
config - a Configuration-format XML job description file

JobConf

public JobConf(Path config)
Construct a map/reduce configuration.

Parameters:
config - a Configuration-format XML job description file
Method Detail

getJar

public String getJar()

setJar

public void setJar(String jar)

setJarByClass

public void setJarByClass(Class cls)
Set the job's jar file by finding an example class location.

Parameters:
cls - the example class

getSystemDir

public Path getSystemDir()

getLocalDirs

public String[] getLocalDirs()
                      throws IOException
Throws:
IOException

deleteLocalFiles

public void deleteLocalFiles()
                      throws IOException
Throws:
IOException

deleteLocalFiles

public void deleteLocalFiles(String subdir)
                      throws IOException
Throws:
IOException

getLocalPath

public Path getLocalPath(String pathString)
                  throws IOException
Constructs a local file name. Files are distributed among configured local directories.

Throws:
IOException

setInputPath

public void setInputPath(Path dir)

addInputPath

public void addInputPath(Path dir)

getInputPaths

public Path[] getInputPaths()

getUser

public String getUser()
Get the reported username for this job.

Returns:
the username

setUser

public void setUser(String user)
Set the reported username for this job.

Parameters:
user - the username

setKeepFailedTaskFiles

public void setKeepFailedTaskFiles(boolean keep)
Set whether the framework should keep the intermediate files for failed tasks.


getKeepFailedTaskFiles

public boolean getKeepFailedTaskFiles()
Should the temporary files for failed tasks be kept?

Returns:
should the files be kept?

setKeepTaskFilesPattern

public void setKeepTaskFilesPattern(String pattern)
Set a regular expression for task names that should be kept. The regular expression ".*_m_000123_0" would keep the files for the first instance of map 123 that ran.

Parameters:
pattern - the java.util.regex.Pattern to match against the task names.

getKeepTaskFilesPattern

public String getKeepTaskFilesPattern()
Get the regular expression that is matched against the task names to see if we need to keep the files.

Returns:
the pattern as a string, if it was set, othewise null

setWorkingDirectory

public void setWorkingDirectory(Path dir)
Set the current working directory for the default file system

Parameters:
dir - the new current working directory

getWorkingDirectory

public Path getWorkingDirectory()
Get the current working directory for the default file system.

Returns:
the directory name

getOutputPath

public Path getOutputPath()

setOutputPath

public void setOutputPath(Path dir)

getInputFormat

public InputFormat getInputFormat()

setInputFormat

public void setInputFormat(Class<? extends InputFormat> theClass)

getOutputFormat

public OutputFormat getOutputFormat()

setOutputFormat

public void setOutputFormat(Class<? extends OutputFormat> theClass)

getInputKeyClass

public Class getInputKeyClass()
Deprecated. Call RecordReader.createKey().


setInputKeyClass

public void setInputKeyClass(Class theClass)
Deprecated. Not used


getInputValueClass

public Class getInputValueClass()
Deprecated. Call RecordReader.createValue().


setInputValueClass

public void setInputValueClass(Class theClass)
Deprecated. Not used


setCompressMapOutput

public void setCompressMapOutput(boolean compress)
Should the map outputs be compressed before transfer? Uses the SequenceFile compression.


getCompressMapOutput

public boolean getCompressMapOutput()
Are the outputs of the maps be compressed?

Returns:
are they compressed?

setMapOutputCompressionType

public void setMapOutputCompressionType(SequenceFile.CompressionType style)
Set the compression type for the map outputs.

Parameters:
style - NONE, RECORD, or BLOCK to control how the map outputs are compressed

getMapOutputCompressionType

public SequenceFile.CompressionType getMapOutputCompressionType()
Get the compression type for the map outputs.

Returns:
the compression type, defaulting to job output compression type

setMapOutputCompressorClass

public void setMapOutputCompressorClass(Class<? extends CompressionCodec> codecClass)
Set the given class as the compression codec for the map outputs.

Parameters:
codecClass - the CompressionCodec class that will compress the map outputs

getMapOutputCompressorClass

public Class<? extends CompressionCodec> getMapOutputCompressorClass(Class<? extends CompressionCodec> defaultValue)
Get the codec for compressing the map outputs

Parameters:
defaultValue - the value to return if it is not set
Returns:
the CompressionCodec class that should be used to compress the map outputs
Throws:
IllegalArgumentException - if the class was specified, but not found

getMapOutputKeyClass

public Class<? extends WritableComparable> getMapOutputKeyClass()
Get the key class for the map output data. If it is not set, use the (final) output ket class This allows the map output key class to be different than the final output key class

Returns:
map output key class

setMapOutputKeyClass

public void setMapOutputKeyClass(Class<? extends WritableComparable> theClass)
Set the key class for the map output data. This allows the user to specify the map output key class to be different than the final output value class


getMapOutputValueClass

public Class<? extends Writable> getMapOutputValueClass()
Get the value class for the map output data. If it is not set, use the (final) output value class This allows the map output value class to be different than the final output value class

Returns:
map output value class

setMapOutputValueClass

public void setMapOutputValueClass(Class<? extends Writable> theClass)
Set the value class for the map output data. This allows the user to specify the map output value class to be different than the final output value class


getOutputKeyClass

public Class<? extends WritableComparable> getOutputKeyClass()

setOutputKeyClass

public void setOutputKeyClass(Class<? extends WritableComparable> theClass)

getOutputKeyComparator

public WritableComparator getOutputKeyComparator()

setOutputKeyComparatorClass

public void setOutputKeyComparatorClass(Class<? extends WritableComparator> theClass)

getOutputValueGroupingComparator

public WritableComparator getOutputValueGroupingComparator()
Get the user defined comparator for grouping values. This call is used to get the comparator for grouping values by key.

Returns:
Comparator set by the user for grouping values.
See Also:
for details.

setOutputValueGroupingComparator

public void setOutputValueGroupingComparator(Class theClass)
Set the user defined comparator for grouping values. For key-value pairs (K1,V1) and (K2,V2), the values are passed in a single call to the map function if K1 and K2 compare as equal. This comparator should be provided if the equivalence rules for keys for sorting the intermediates are different from those for grouping values.

Parameters:
theClass - The Comparator class to be used for grouping. It should extend WritableComparator.

getOutputValueClass

public Class<? extends Writable> getOutputValueClass()

setOutputValueClass

public void setOutputValueClass(Class<? extends Writable> theClass)

getMapperClass

public Class<? extends Mapper> getMapperClass()

setMapperClass

public void setMapperClass(Class<? extends Mapper> theClass)

getMapRunnerClass

public Class<? extends MapRunnable> getMapRunnerClass()

setMapRunnerClass

public void setMapRunnerClass(Class<? extends MapRunnable> theClass)

getPartitionerClass

public Class<? extends Partitioner> getPartitionerClass()

setPartitionerClass

public void setPartitionerClass(Class<? extends Partitioner> theClass)

getReducerClass

public Class<? extends Reducer> getReducerClass()

setReducerClass

public void setReducerClass(Class<? extends Reducer> theClass)

getCombinerClass

public Class<? extends Reducer> getCombinerClass()

setCombinerClass

public void setCombinerClass(Class<? extends Reducer> theClass)

getSpeculativeExecution

public boolean getSpeculativeExecution()
Should speculative execution be used for this job?

Returns:
Defaults to true

setSpeculativeExecution

public void setSpeculativeExecution(boolean new_val)
Turn on or off speculative execution for this job. In general, it should be turned off for map jobs that have side effects.


getNumMapTasks

public int getNumMapTasks()

setNumMapTasks

public void setNumMapTasks(int n)

getNumReduceTasks

public int getNumReduceTasks()

setNumReduceTasks

public void setNumReduceTasks(int n)

getMaxMapAttempts

public int getMaxMapAttempts()
Get the configured number of maximum attempts that will be made to run a map task, as specified by the mapred.map.max.attempts property. If this property is not already set, the default is 4 attempts

Returns:
the max number of attempts

setMaxMapAttempts

public void setMaxMapAttempts(int n)
Expert: Set the number of maximum attempts that will be made to run a map task

Parameters:
n - the number of attempts

getMaxReduceAttempts

public int getMaxReduceAttempts()
Get the configured number of maximum attempts that will be made to run a reduce task, as specified by the mapred.reduce.max.attempts property. If this property is not already set, the default is 4 attempts

Returns:
the max number of attempts

setMaxReduceAttempts

public void setMaxReduceAttempts(int n)
Expert: Set the number of maximum attempts that will be made to run a reduce task

Parameters:
n - the number of attempts

getJobName

public String getJobName()
Get the user-specified job name. This is only used to identify the job to the user.

Returns:
the job's name, defaulting to ""

setJobName

public void setJobName(String name)
Set the user-specified job name.

Parameters:
name - the job's new name

setMaxTaskFailuresPerTracker

public void setMaxTaskFailuresPerTracker(int noFailures)
Set the maximum no. of failures of a given job per tasktracker.

Parameters:
noFailures - maximum no. of failures of a given job per tasktracker.

getMaxTaskFailuresPerTracker

public int getMaxTaskFailuresPerTracker()
Get the maximum no. of failures of a given job per tasktracker.

Returns:
the maximum no. of failures of a given job per tasktracker.

getMaxMapTaskFailuresPercent

public int getMaxMapTaskFailuresPercent()
Get the maximum percentage of map tasks that can fail without the job being aborted.

Returns:
the maximum percentage of map tasks that can fail without the job being aborted

setMaxMapTaskFailuresPercent

public void setMaxMapTaskFailuresPercent(int percent)
Set the maximum percentage of map tasks that can fail without the job being aborted.

Parameters:
percent - the maximum percentage of map tasks that can fail without the job being aborted

getMaxReduceTaskFailuresPercent

public int getMaxReduceTaskFailuresPercent()
Get the maximum percentage of reduce tasks that can fail without the job being aborted.

Returns:
the maximum percentage of reduce tasks that can fail without the job being aborted

setMaxReduceTaskFailuresPercent

public void setMaxReduceTaskFailuresPercent(int percent)
Set the maximum percentage of reduce tasks that can fail without the job being aborted.

Parameters:
percent - the maximum percentage of reduce tasks that can fail without the job being aborted


Copyright © 2006 The Apache Software Foundation