private void unBlackListTracker(String hostName, ReasonForBlackListing rfb) { // check if you can black list the tracker then call this methods FaultInfo fi = getFaultInfo(hostName, false); if(fi.removeBlackListedReason(rfb)) { if(fi.getReasonforblacklisting().isEmpty()) { addHostCapacity(hostName); LOG.info("Unblacklisting tracker : " + hostName); fi.unBlacklist(); //We have unBlackListed tracker, so tracker should //definitely be healthy. Check fault count if fault count //is zero don't keep it memory. if(fi.getFaultCount() == 0) { potentiallyFaultyTrackers.remove(hostName); } } } }
String[] exceptions = e.getValue().toArray(new String[0]); faultyTrackers.incrementFaults(tt, new JobFault(tt, jobName, exceptions));
/** * Get the active and blacklisted task tracker names in the cluster. The first * element in the returned list contains the list of active tracker names. * The second element in the returned list contains the list of blacklisted * tracker names. */ // This method is synchronized to make sure that the locking order // "taskTrackers lock followed by faultyTrackers.potentiallyFaultyTrackers // lock" is under JobTracker lock to avoid deadlocks. synchronized public List<List<String>> taskTrackerNames() { List<String> activeTrackers = new ArrayList<String>(); List<String> blacklistedTrackers = new ArrayList<String>(); synchronized (taskTrackers) { for (TaskTracker tt : taskTrackers.values()) { TaskTrackerStatus status = tt.getStatus(); if (!faultyTrackers.isBlacklisted(status.getHost())) { activeTrackers.add(status.getTrackerName()); } else { blacklistedTrackers.add(status.getTrackerName()); } } } List<List<String>> result = new ArrayList<List<String>>(2); result.add(activeTrackers); result.add(blacklistedTrackers); return result; }
long now = getClock().getTime(); if (restarted) { faultyTrackers.markTrackerHealthy(status.getHost()); } else { faultyTrackers.shouldAssignTasksToTracker(status.getHost(), now); !faultyTrackers.isBlacklisted(status.getHost());
boolean isBlacklisted = false; if (restarted) { faultyTrackers.markTrackerHealthy(status.getHost()); } else { isBlacklisted = faultyTrackers.shouldAssignTasksToTracker(status.getHost(), now); isBlacklisted = faultyTrackers.isBlacklisted(status.getHost());
if (!faultyTrackers.isBlacklisted(status.getHost())) { if (!faultyTrackers.isBlacklisted(oldStatus.getHost())) {
getInstrumentation().decOccupiedMapSlots(oldStatus.countOccupiedMapSlots()); getInstrumentation().decOccupiedReduceSlots(oldStatus.countOccupiedReduceSlots()); if (!faultyTrackers.isBlacklisted(oldStatus.getHost())) { int mapSlots = oldStatus.getMaxMapSlots(); totalMapTaskCapacity -= mapSlots; getInstrumentation().addOccupiedMapSlots(status.countOccupiedMapSlots()); getInstrumentation().addOccupiedReduceSlots(status.countOccupiedReduceSlots()); if (!faultyTrackers.isBlacklisted(status.getHost())) { int mapSlots = status.getMaxMapSlots(); totalMapTaskCapacity += mapSlots;
fi.setFaultCount(numFaults); fi.setLastUpdated(now); if (canUnBlackListTracker(hostName, ReasonForBlackListing.EXCEEDING_FAILURES)) { unBlackListTracker(hostName, ReasonForBlackListing.EXCEEDING_FAILURES);
if (job.getNoOfBlackListedTrackers() > 0) { for (String hostName : job.getBlackListedTrackers()) { faultyTrackers.incrementFaults(hostName);
private void blackListTracker(String hostName, String reason, ReasonForBlackListing rfb) { FaultInfo fi = getFaultInfo(hostName, true); boolean blackListed = fi.isBlacklisted(); if (blackListed) { if (LOG.isDebugEnabled()) { LOG.debug("Adding blacklisted reason for tracker : " + hostName + " Reason for blacklisting is : " + rfb); } if (!fi.getReasonforblacklisting().contains(rfb)) { LOG.info("Adding blacklisted reason for tracker : " + hostName + " Reason for blacklisting is : " + rfb); } fi.addBlackListedReason(rfb, reason); } else { LOG.info("Blacklisting tracker : " + hostName + " Reason for blacklisting is : " + rfb); Set<TaskTracker> trackers = hostnameToTaskTracker.get(hostName); synchronized (trackers) { for (TaskTracker tracker : trackers) { tracker.cancelAllReservations(); } } removeHostCapacity(hostName); fi.setBlacklist(rfb, reason); } }
private void blackListTracker(String hostName, String reason, ReasonForBlackListing rfb) { FaultInfo fi = getFaultInfo(hostName, true); boolean blackListed = fi.isBlacklisted(); if(blackListed) { if (LOG.isDebugEnabled()) { LOG.debug("Adding blacklisted reason for tracker : " + hostName + " Reason for blacklisting is : " + rfb); } if (!fi.getReasonforblacklisting().contains(rfb)) { LOG.info("Adding blacklisted reason for tracker : " + hostName + " Reason for blacklisting is : " + rfb); } fi.addBlackListedReason(rfb, reason); } else { LOG.info("Blacklisting tracker : " + hostName + " Reason for blacklisting is : " + rfb); Set<TaskTracker> trackers = hostnameToTaskTracker.get(hostName); synchronized (trackers) { for (TaskTracker tracker : trackers) { tracker.cancelAllReservations(); } } removeHostCapacity(hostName); fi.setBlacklist(rfb, reason); } }
/** * Get the active and blacklisted task tracker names in the cluster. The first * element in the returned list contains the list of active tracker names. * The second element in the returned list contains the list of blacklisted * tracker names. */ // This method is synchronized to make sure that the locking order // "taskTrackers lock followed by faultyTrackers.potentiallyFaultyTrackers // lock" is under JobTracker lock to avoid deadlocks. synchronized public List<List<String>> taskTrackerNames() { List<String> activeTrackers = new ArrayList<String>(); List<String> blacklistedTrackers = new ArrayList<String>(); synchronized (taskTrackers) { for (TaskTracker tt : taskTrackers.values()) { TaskTrackerStatus status = tt.getStatus(); if (!faultyTrackers.isBlacklisted(status.getHost())) { activeTrackers.add(status.getTrackerName()); } else { blacklistedTrackers.add(status.getTrackerName()); } } } List<List<String>> result = new ArrayList<List<String>>(2); result.add(activeTrackers); result.add(blacklistedTrackers); return result; }
/** * Check whether tasks can be assigned to the tracker. * * One fault of the tracker is discarded if there * are no faults during one day. So, the tracker will get a * chance again to run tasks of a job. * Assumes JobTracker is locked on the entry. * * @param hostName The tracker name * @param now The current time * * @return true if the tracker is blacklisted * false otherwise */ boolean shouldAssignTasksToTracker(String hostName, long now) { synchronized (potentiallyFaultyTrackers) { FaultInfo fi = potentiallyFaultyTrackers.get(hostName); if (fi != null && (now - fi.getLastUpdated()) > UpdateFaultyTrackerInterval) { fi.forgiveOneFault(); fi.setLastUpdated(now); if (canUnBlackListTracker(hostName, ReasonForBlackListing.EXCEEDING_FAILURES)) { unBlackListTracker(hostName, ReasonForBlackListing.EXCEEDING_FAILURES); } } return (fi != null && fi.isBlacklisted()); } }
/** * Increments faults(blacklist by job) for the tracker by one. * * Adds the tracker to the potentially faulty list. * Assumes JobTracker is locked on the entry. * * @param hostName */ void incrementFaults(String hostName) { synchronized (potentiallyFaultyTrackers) { FaultInfo fi = getFaultInfo(hostName, true); long now = clock.getTime(); int numFaults = fi.getFaultCount(); ++numFaults; fi.setFaultCount(numFaults); fi.setLastUpdated(now); if (exceedsFaults(fi)) { LOG.info("Adding " + hostName + " to the blacklist" + " across all jobs"); String reason = String.format(FaultInfo.FAULT_FORMAT_STRING, numFaults); blackListTracker(hostName, reason, ReasonForBlackListing.EXCEEDING_FAILURES); } } }
/** * Increments faults(blacklist by job) for the tracker by one. * * Adds the tracker to the potentially faulty list. * Assumes JobTracker is locked on the entry. * * @param hostName */ void incrementFaults(String hostName, JobFault jf) { synchronized (potentiallyFaultyTrackers) { FaultInfo fi = getFaultInfo(hostName, true); fi.addFault(jf); fi.setLastUpdated(getClock().getTime()); if (exceedsFaults(fi)) { LOG.info("Adding " + hostName + " to the blacklist" + " across all jobs"); String reason = String.format(FaultInfo.FAULT_FORMAT_STRING, fi.getFaultCount()); blackListTracker(hostName, reason, ReasonForBlackListing.EXCEEDING_FAILURES); } } }
void setNodeHealthStatus(String hostName, boolean isHealthy, String reason) { FaultInfo fi = null; // If tracker is not healthy, create a fault info object // blacklist it. if (!isHealthy) { fi = getFaultInfo(hostName, true); fi.setHealthy(isHealthy); synchronized (potentiallyFaultyTrackers) { blackListTracker(hostName, reason, ReasonForBlackListing.NODE_UNHEALTHY); } } else { fi = getFaultInfo(hostName, false); if (fi == null) { return; } else { if (canUnBlackListTracker(hostName, ReasonForBlackListing.NODE_UNHEALTHY)) { unBlackListTracker(hostName, ReasonForBlackListing.NODE_UNHEALTHY); } } } } }
void setNodeHealthStatus(String hostName, boolean isHealthy, String reason) { FaultInfo fi = null; // If tracker is not healthy, create a fault info object // blacklist it. if (!isHealthy) { fi = getFaultInfo(hostName, true); fi.setHealthy(isHealthy); updateNodeHealthFailureStatistics(hostName, fi); synchronized (potentiallyFaultyTrackers) { blackListTracker(hostName, reason, ReasonForBlackListing.NODE_UNHEALTHY); } } else { fi = getFaultInfo(hostName, false); if (fi == null) { return; } else { if (canUnBlackListTracker(hostName, ReasonForBlackListing.NODE_UNHEALTHY)) { unBlackListTracker(hostName, ReasonForBlackListing.NODE_UNHEALTHY); } } } }
/** * Get the blacklisted task tracker statuses in the cluster * * @return {@link Collection} of blacklisted {@link TaskTrackerStatus} */ // This method is synchronized to make sure that the locking order // "taskTrackers lock followed by faultyTrackers.potentiallyFaultyTrackers // lock" is under JobTracker lock to avoid deadlocks. synchronized public Collection<TaskTrackerStatus> blacklistedTaskTrackers() { Collection<TaskTrackerStatus> blacklistedTrackers = new ArrayList<TaskTrackerStatus>(); synchronized (taskTrackers) { for (TaskTracker tt : taskTrackers.values()) { TaskTrackerStatus status = tt.getStatus(); if (faultyTrackers.isBlacklisted(status.getHost())) { blacklistedTrackers.add(status); } } } return blacklistedTrackers; }
/** * Get the active task tracker statuses in the cluster * * @return {@link Collection} of active {@link TaskTrackerStatus} */ // This method is synchronized to make sure that the locking order // "taskTrackers lock followed by faultyTrackers.potentiallyFaultyTrackers // lock" is under JobTracker lock to avoid deadlocks. synchronized public Collection<TaskTrackerStatus> activeTaskTrackers() { Collection<TaskTrackerStatus> activeTrackers = new ArrayList<TaskTrackerStatus>(); synchronized (taskTrackers) { for ( TaskTracker tt : taskTrackers.values()) { TaskTrackerStatus status = tt.getStatus(); if (!faultyTrackers.isBlacklisted(status.getHost())) { activeTrackers.add(status); } } } return activeTrackers; }
/** * Get the active task tracker statuses in the cluster * * @return {@link Collection} of active {@link TaskTrackerStatus} */ // This method is synchronized to make sure that the locking order // "taskTrackers lock followed by faultyTrackers.potentiallyFaultyTrackers // lock" is under JobTracker lock to avoid deadlocks. synchronized public Collection<TaskTrackerStatus> activeTaskTrackers() { Collection<TaskTrackerStatus> activeTrackers = new ArrayList<TaskTrackerStatus>(); synchronized (taskTrackers) { for ( TaskTracker tt : taskTrackers.values()) { TaskTrackerStatus status = tt.getStatus(); if (!faultyTrackers.isBlacklisted(status.getHost())) { activeTrackers.add(status); } } } return activeTrackers; }