org.apache.hadoop.mapred
Class JobTracker

java.lang.Object
  extended by org.apache.hadoop.mapred.JobTracker

public class JobTracker
extends Object

JobTracker is the central location for submitting and tracking MR jobs in a network environment.


Nested Class Summary
static class JobTracker.IllegalStateException
          A client tried to submit a job before the Job Tracker was ready.
static class JobTracker.State
           
 
Field Summary
static int CLUSTER_INCREMENT
           
static long COUNTER_UPDATE_INTERVAL
           
static int FILE_NOT_FOUND
           
static int HEARTBEAT_INTERVAL_MIN
           
static org.apache.commons.logging.Log LOG
           
static String MAP_OUTPUT_LENGTH
          The custom http header used for the map output length.
static float MAX_INMEM_FILESIZE_FRACTION
          Constant denoting the max size (in terms of the fraction of the total size of the filesys) of a map output file that we will try to keep in mem.
static float MAX_INMEM_FILESYS_USE
          Constant denoting when a merge of in memory files will be triggered
static String RAW_MAP_OUTPUT_LENGTH
          The custom http header used for the "raw" map output length.
static int SUCCESS
           
static String TEMP_DIR_NAME
          Temporary directory name
static int TRACKERS_OK
           
static int UNKNOWN_TASKTRACKER
           
static long versionID
          version 3 introduced to replace emitHearbeat/pollForNewTask/pollForTaskWithClosedJob with heartbeat(TaskTrackerStatus, boolean, boolean, short) version 4 changed TaskReport for HADOOP-549.
static long versionID
           
static String WORKDIR
           
 
Method Summary
 Vector<org.apache.hadoop.mapred.JobInProgress> completedJobs()
           
 Vector<org.apache.hadoop.mapred.JobInProgress> failedJobs()
           
static InetSocketAddress getAddress(Configuration conf)
           
 JobStatus[] getAllJobs()
          Get all the jobs submitted.
 String getAssignedTracker(String taskid)
          Deprecated. 
 String getAssignedTracker(TaskAttemptID taskId)
          Get tracker name for a given task id.
 String getBuildVersion()
          Returns the buildVersion of the JobTracker
 ClusterStatus getClusterStatus()
          Get the current status of the cluster
 String getFilesystemName()
          Grab the local fs name
 int getInfoPort()
           
 org.apache.hadoop.mapred.JobInProgress getJob(JobID jobid)
           
 org.apache.hadoop.mapred.JobInProgress getJob(String jobid)
          Deprecated. 
 Counters getJobCounters(JobID jobid)
          Grab the current job counters
 Counters getJobCounters(String id)
          Deprecated. 
 JobProfile getJobProfile(JobID jobid)
          Grab a handle to a job that is already known to the JobTracker.
 JobProfile getJobProfile(String id)
          Deprecated. 
 JobStatus getJobStatus(JobID jobid)
          Grab a handle to a job that is already known to the JobTracker.
 JobStatus getJobStatus(String id)
          Deprecated. 
 String getJobTrackerMachine()
           
static String getLocalJobFilePath(JobID jobId)
          Get the localized job file path on the job trackers local file system
 String getLocalJobFilePath(String jobid)
          Deprecated. 
 TaskReport[] getMapTaskReports(JobID jobid)
          Grab a bunch of info on the map tasks that make up the job
 TaskReport[] getMapTaskReports(String jobid)
          Deprecated. 
 JobID getNewJobId()
          Allocates a new JobId string.
 Node getNode(String name)
          Return the Node in the network topology that corresponds to the hostname
 Collection<Node> getNodesAtMaxLevel()
          Returns a collection of nodes at the max level
 int getNumResolvedTaskTrackers()
           
 int getNumTaskCacheLevels()
           
static Node getParentNode(Node node, int level)
           
 long getProtocolVersion(String protocol, long clientVersion)
          Return protocol version corresponding to protocol interface.
 TaskReport[] getReduceTaskReports(JobID jobid)
          Grab a bunch of info on the reduce tasks that make up the job
 TaskReport[] getReduceTaskReports(String jobid)
          Deprecated. 
 List<org.apache.hadoop.mapred.JobInProgress> getRunningJobs()
          Version that is called from a timer thread, and therefore needs to be careful to synchronize.
 long getStartTime()
           
 String getSystemDir()
          Grab the jobtracker system directory path where job-specific files are to be placed.
 TaskCompletionEvent[] getTaskCompletionEvents(JobID jobid, int fromEventId, int maxEvents)
          Get task completion events for the jobid, starting from fromEventId.
 TaskCompletionEvent[] getTaskCompletionEvents(String jobid, int fromid, int maxevents)
          Deprecated. 
 String[] getTaskDiagnostics(String jobid, String tipid, String taskid)
          Deprecated. 
 String[] getTaskDiagnostics(TaskAttemptID taskId)
          Get the diagnostics for a given task
 org.apache.hadoop.mapred.TaskTrackerStatus getTaskTracker(String trackerID)
           
 org.apache.hadoop.mapred.TaskInProgress getTip(TaskID tipid)
          Returns specified TaskInProgress, or null.
 int getTotalSubmissions()
           
 String getTrackerIdentifier()
          Get the unique identifier (ie.
 int getTrackerPort()
           
 org.apache.hadoop.mapred.HeartbeatResponse heartbeat(org.apache.hadoop.mapred.TaskTrackerStatus status, boolean initialContact, boolean acceptNewTasks, short responseId)
          The periodic heartbeat mechanism between the TaskTracker and the JobTracker.
 JobStatus[] jobsToComplete()
          Get the jobs that are not completed and not failed
 void killJob(JobID jobid)
          Kill the indicated job
 void killJob(String id)
          Deprecated. 
 boolean killTask(String taskId, boolean shouldFail)
          Deprecated. 
 boolean killTask(TaskAttemptID taskid, boolean shouldFail)
          Mark a Task to be killed
static void main(String[] argv)
          Start the JobTracker process.
 void offerService()
          Run forever
 void reportTaskTrackerError(String taskTracker, String errorClass, String errorMessage)
          Report a problem to the job tracker.
 Node resolveAndAddToTopology(String name)
           
 Vector<org.apache.hadoop.mapred.JobInProgress> runningJobs()
           
static JobTracker startTracker(JobConf conf)
          Start the JobTracker with given configuration.
 void stopTracker()
           
 JobStatus submitJob(JobID jobId)
          JobTracker.submitJob() kicks off a new job.
 JobStatus submitJob(String jobid)
          Deprecated. 
 Collection taskTrackers()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

LOG

public static final org.apache.commons.logging.Log LOG

HEARTBEAT_INTERVAL_MIN

public static final int HEARTBEAT_INTERVAL_MIN
See Also:
Constant Field Values

CLUSTER_INCREMENT

public static final int CLUSTER_INCREMENT
See Also:
Constant Field Values

COUNTER_UPDATE_INTERVAL

public static final long COUNTER_UPDATE_INTERVAL
See Also:
Constant Field Values

MAX_INMEM_FILESYS_USE

public static final float MAX_INMEM_FILESYS_USE
Constant denoting when a merge of in memory files will be triggered

See Also:
Constant Field Values

MAX_INMEM_FILESIZE_FRACTION

public static final float MAX_INMEM_FILESIZE_FRACTION
Constant denoting the max size (in terms of the fraction of the total size of the filesys) of a map output file that we will try to keep in mem. Ideally, this should be a factor of MAX_INMEM_FILESYS_USE

See Also:
Constant Field Values

SUCCESS

public static final int SUCCESS
See Also:
Constant Field Values

FILE_NOT_FOUND

public static final int FILE_NOT_FOUND
See Also:
Constant Field Values

MAP_OUTPUT_LENGTH

public static final String MAP_OUTPUT_LENGTH
The custom http header used for the map output length.

See Also:
Constant Field Values

RAW_MAP_OUTPUT_LENGTH

public static final String RAW_MAP_OUTPUT_LENGTH
The custom http header used for the "raw" map output length.

See Also:
Constant Field Values

TEMP_DIR_NAME

public static final String TEMP_DIR_NAME
Temporary directory name

See Also:
Constant Field Values

WORKDIR

public static final String WORKDIR
See Also:
Constant Field Values

versionID

public static final long versionID
version 3 introduced to replace emitHearbeat/pollForNewTask/pollForTaskWithClosedJob with heartbeat(TaskTrackerStatus, boolean, boolean, short) version 4 changed TaskReport for HADOOP-549. version 5 introduced that removes locateMapOutputs and instead uses getTaskCompletionEvents to figure finished maps and fetch the outputs version 6 adds maxTasks to TaskTrackerStatus for HADOOP-1245 version 7 replaces maxTasks by maxMapTasks and maxReduceTasks in TaskTrackerStatus for HADOOP-1274 Version 8: HeartbeatResponse is added with the next heartbeat interval. version 9 changes the counter representation for HADOOP-2248 version 10 changes the TaskStatus representation for HADOOP-2208 version 11 changes string to JobID in getTaskCompletionEvents(). version 12 changes the counters representation for HADOOP-1915 version 13 added call getBuildVersion() for HADOOP-236 Version 14: replaced getFilesystemName with getSystemDir for HADOOP-3135

See Also:
Constant Field Values

TRACKERS_OK

public static final int TRACKERS_OK
See Also:
Constant Field Values

UNKNOWN_TASKTRACKER

public static final int UNKNOWN_TASKTRACKER
See Also:
Constant Field Values

versionID

public static final long versionID
See Also:
Constant Field Values
Method Detail

startTracker

public static JobTracker startTracker(JobConf conf)
                               throws IOException,
                                      InterruptedException
Start the JobTracker with given configuration. The conf will be modified to reflect the actual ports on which the JobTracker is up and running if the user passes the port as zero.

Parameters:
conf - configuration for the JobTracker.
Throws:
IOException
InterruptedException

stopTracker

public void stopTracker()
                 throws IOException
Throws:
IOException

getProtocolVersion

public long getProtocolVersion(String protocol,
                               long clientVersion)
                        throws IOException
Description copied from interface: VersionedProtocol
Return protocol version corresponding to protocol interface.

Parameters:
protocol - The classname of the protocol interface
clientVersion - The version of the protocol that the client speaks
Returns:
the version that the server will speak
Throws:
IOException

getAddress

public static InetSocketAddress getAddress(Configuration conf)

offerService

public void offerService()
                  throws InterruptedException
Run forever

Throws:
InterruptedException

getTotalSubmissions

public int getTotalSubmissions()

getJobTrackerMachine

public String getJobTrackerMachine()

getTrackerIdentifier

public String getTrackerIdentifier()
Get the unique identifier (ie. timestamp) of this job tracker start.

Returns:
a string with a unique identifier

getTrackerPort

public int getTrackerPort()

getInfoPort

public int getInfoPort()

getStartTime

public long getStartTime()

runningJobs

public Vector<org.apache.hadoop.mapred.JobInProgress> runningJobs()

getRunningJobs

public List<org.apache.hadoop.mapred.JobInProgress> getRunningJobs()
Version that is called from a timer thread, and therefore needs to be careful to synchronize.


failedJobs

public Vector<org.apache.hadoop.mapred.JobInProgress> failedJobs()

completedJobs

public Vector<org.apache.hadoop.mapred.JobInProgress> completedJobs()

taskTrackers

public Collection taskTrackers()

getTaskTracker

public org.apache.hadoop.mapred.TaskTrackerStatus getTaskTracker(String trackerID)

resolveAndAddToTopology

public Node resolveAndAddToTopology(String name)

getNodesAtMaxLevel

public Collection<Node> getNodesAtMaxLevel()
Returns a collection of nodes at the max level


getParentNode

public static Node getParentNode(Node node,
                                 int level)

getNode

public Node getNode(String name)
Return the Node in the network topology that corresponds to the hostname


getNumTaskCacheLevels

public int getNumTaskCacheLevels()

getNumResolvedTaskTrackers

public int getNumResolvedTaskTrackers()

getBuildVersion

public String getBuildVersion()
                       throws IOException
Returns the buildVersion of the JobTracker

Throws:
IOException

heartbeat

public org.apache.hadoop.mapred.HeartbeatResponse heartbeat(org.apache.hadoop.mapred.TaskTrackerStatus status,
                                                            boolean initialContact,
                                                            boolean acceptNewTasks,
                                                            short responseId)
                                                     throws IOException
The periodic heartbeat mechanism between the TaskTracker and the JobTracker. The JobTracker processes the status information sent by the TaskTracker and responds with instructions to start/stop tasks or jobs, and also 'reset' instructions during contingencies.

Parameters:
status - the status update
initialContact - true if this is first interaction since 'refresh', false otherwise.
acceptNewTasks - true if the TaskTracker is ready to accept new tasks to run.
responseId - the last responseId successfully acted upon by the TaskTracker.
Returns:
a HeartbeatResponse with fresh instructions.
Throws:
IOException

getFilesystemName

public String getFilesystemName()
                         throws IOException
Grab the local fs name

Throws:
IOException

reportTaskTrackerError

public void reportTaskTrackerError(String taskTracker,
                                   String errorClass,
                                   String errorMessage)
                            throws IOException
Report a problem to the job tracker.

Parameters:
taskTracker - the name of the task tracker
errorClass - the kind of error (eg. the class that was thrown)
errorMessage - the human readable error message
Throws:
IOException - if there was a problem in communication or on the remote side

getNewJobId

public JobID getNewJobId()
                  throws IOException
Allocates a new JobId string.

Returns:
a unique job name for submitting jobs.
Throws:
IOException

submitJob

@Deprecated
public JobStatus submitJob(String jobid)
                    throws IOException
Deprecated. 

Throws:
IOException

submitJob

public JobStatus submitJob(JobID jobId)
                    throws IOException
JobTracker.submitJob() kicks off a new job. Create a 'JobInProgress' object, which contains both JobProfile and JobStatus. Those two sub-objects are sometimes shipped outside of the JobTracker. But JobInProgress adds info that's useful for the JobTracker alone. We add the JIP to the jobInitQueue, which is processed asynchronously to handle split-computation and build up the right TaskTracker/Block mapping.

Throws:
IOException

getClusterStatus

public ClusterStatus getClusterStatus()
Get the current status of the cluster

Returns:
summary of the state of the cluster

killJob

@Deprecated
public void killJob(String id)
Deprecated. 


killJob

public void killJob(JobID jobid)
Kill the indicated job


getJobProfile

@Deprecated
public JobProfile getJobProfile(String id)
Deprecated. 


getJobProfile

public JobProfile getJobProfile(JobID jobid)
Grab a handle to a job that is already known to the JobTracker.

Returns:
Profile of the job, or null if not found.

getJobStatus

@Deprecated
public JobStatus getJobStatus(String id)
Deprecated. 


getJobStatus

public JobStatus getJobStatus(JobID jobid)
Grab a handle to a job that is already known to the JobTracker.

Returns:
Status of the job, or null if not found.

getJobCounters

@Deprecated
public Counters getJobCounters(String id)
Deprecated. 


getJobCounters

public Counters getJobCounters(JobID jobid)
Grab the current job counters


getMapTaskReports

@Deprecated
public TaskReport[] getMapTaskReports(String jobid)
Deprecated. 


getMapTaskReports

public TaskReport[] getMapTaskReports(JobID jobid)
Grab a bunch of info on the map tasks that make up the job


getReduceTaskReports

@Deprecated
public TaskReport[] getReduceTaskReports(String jobid)
Deprecated. 


getReduceTaskReports

public TaskReport[] getReduceTaskReports(JobID jobid)
Grab a bunch of info on the reduce tasks that make up the job


getTaskCompletionEvents

@Deprecated
public TaskCompletionEvent[] getTaskCompletionEvents(String jobid,
                                                                int fromid,
                                                                int maxevents)
                                              throws IOException
Deprecated. 

Throws:
IOException

getTaskCompletionEvents

public TaskCompletionEvent[] getTaskCompletionEvents(JobID jobid,
                                                     int fromEventId,
                                                     int maxEvents)
                                              throws IOException
Get task completion events for the jobid, starting from fromEventId. Returns empty aray if no events are available.

Parameters:
jobid - job id
fromEventId - event id to start from.
maxEvents - the max number of events we want to look at
Returns:
array of task completion events.
Throws:
IOException

getTaskDiagnostics

@Deprecated
public String[] getTaskDiagnostics(String jobid,
                                              String tipid,
                                              String taskid)
                            throws IOException
Deprecated. 

Throws:
IOException

getTaskDiagnostics

public String[] getTaskDiagnostics(TaskAttemptID taskId)
                            throws IOException
Get the diagnostics for a given task

Parameters:
taskId - the id of the task
Returns:
an array of the diagnostic messages
Throws:
IOException

getTip

public org.apache.hadoop.mapred.TaskInProgress getTip(TaskID tipid)
Returns specified TaskInProgress, or null.


killTask

@Deprecated
public boolean killTask(String taskId,
                                   boolean shouldFail)
                 throws IOException
Deprecated. 

Throws:
IOException

killTask

public boolean killTask(TaskAttemptID taskid,
                        boolean shouldFail)
                 throws IOException
Mark a Task to be killed

Parameters:
taskid - the id of the task to kill.
shouldFail - if true the task is failed and added to failed tasks list, otherwise it is just killed, w/o affecting job failure status.
Throws:
IOException

getAssignedTracker

@Deprecated
public String getAssignedTracker(String taskid)
Deprecated. 


getAssignedTracker

public String getAssignedTracker(TaskAttemptID taskId)
Get tracker name for a given task id.

Parameters:
taskId - the name of the task
Returns:
The name of the task tracker

jobsToComplete

public JobStatus[] jobsToComplete()
Get the jobs that are not completed and not failed

Returns:
array of JobStatus for the running/to-be-run jobs.

getAllJobs

public JobStatus[] getAllJobs()
Get all the jobs submitted.

Returns:
array of JobStatus for the submitted jobs

getSystemDir

public String getSystemDir()
Grab the jobtracker system directory path where job-specific files are to be placed.

Returns:
the system directory where job-specific files are to be placed.
See Also:
JobSubmissionProtocol.getSystemDir()

getJob

@Deprecated
public org.apache.hadoop.mapred.JobInProgress getJob(String jobid)
Deprecated. 


getJob

public org.apache.hadoop.mapred.JobInProgress getJob(JobID jobid)

getLocalJobFilePath

@Deprecated
public String getLocalJobFilePath(String jobid)
Deprecated. 


getLocalJobFilePath

public static String getLocalJobFilePath(JobID jobId)
Get the localized job file path on the job trackers local file system

Parameters:
jobId - id of the job
Returns:
the path of the job conf file on the local file system

main

public static void main(String[] argv)
                 throws IOException,
                        InterruptedException
Start the JobTracker process. This is used only for debugging. As a rule, JobTracker should be run as part of the DFS Namenode process.

Throws:
IOException
InterruptedException


Copyright © 2008 The Apache Software Foundation