private DatanodeDescriptor chooseRandomNode(Set<String> excludedUUids, boolean preferLiveNodes) { Random r = new Random(); for (int i = dnR.size() - 1; i >= 0; --i) { int pos = r.nextInt(i + 1); DatanodeDescriptor node = dnR.get(pos); String uuid = node.getDatanodeUuid(); if (!excludedUUids.contains(uuid)) { if (!preferLiveNodes || node.getAdminState() == AdminStates.NORMAL) { return node; } } Collections.swap(dnR, i, pos); } return null; }
/** * Returns whether a node can be safely decommissioned or in maintenance * based on its liveness. Dead nodes cannot always be safely decommissioned * or in maintenance. */ boolean isNodeHealthyForDecommissionOrMaintenance(DatanodeDescriptor node) { if (!node.checkBlockReportReceived()) { LOG.info("Node {} hasn't sent its first block report.", node); return false; } if (node.isAlive()) { return true; } updateState(); if (pendingReconstructionBlocksCount == 0 && lowRedundancyBlocksCount == 0) { LOG.info("Node {} is dead and there are no low redundancy" + " blocks or blocks pending reconstruction. Safe to decommission or", " put in maintenance.", node); return true; } LOG.warn("Node {} is dead " + "while in {}. Cannot be safely " + "decommissioned or be in maintenance since there is risk of reduced " + "data durability or data loss. Either restart the failed node or " + "force decommissioning or maintenance by removing, calling " + "refreshNodes, then re-adding to the excludes or host config files.", node, node.getAdminState()); return false; }
/** * Stop decommissioning the specified datanode. * @param node */ @VisibleForTesting public void stopDecommission(DatanodeDescriptor node) { if (node.isDecommissionInProgress() || node.isDecommissioned()) { // Update DN stats maintained by HeartbeatManager hbManager.stopDecommission(node); // extra redundancy blocks will be detected and processed when // the dead node comes back and send in its full block report. if (node.isAlive()) { blockManager.processExtraRedundancyBlocksOnInService(node); } // Remove from tracking in DatanodeAdminManager pendingNodes.remove(node); outOfServiceNodeBlocks.remove(node); } else { LOG.trace("stopDecommission: Node {} in {}, nothing to do.", node, node.getAdminState()); } }
/** * Returned information is a JSON representation of map with host name as the * key and value is a map of dead node attribute keys to its values */ @Override // NameNodeMXBean public String getDeadNodes() { final Map<String, Map<String, Object>> info = new HashMap<String, Map<String, Object>>(); final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); blockManager.getDatanodeManager().fetchDatanodes(null, dead, false); for (DatanodeDescriptor node : dead) { Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder() .put("lastContact", getLastContact(node)) .put("decommissioned", node.isDecommissioned()) .put("adminState", node.getAdminState().toString()) .put("xferaddr", node.getXferAddr()) .build(); info.put(node.getHostName() + ":" + node.getXferPort(), innerinfo); } return JSON.toString(info); }
} else { LOG.trace("stopMaintenance: Node {} in {}, nothing to do.", node, node.getAdminState());
/** * Start decommissioning the specified datanode. * @param node */ @VisibleForTesting public void startDecommission(DatanodeDescriptor node) { if (!node.isDecommissionInProgress() && !node.isDecommissioned()) { // Update DN stats maintained by HeartbeatManager hbManager.startDecommission(node); // hbManager.startDecommission will set dead node to decommissioned. if (node.isDecommissionInProgress()) { for (DatanodeStorageInfo storage : node.getStorageInfos()) { LOG.info("Starting decommission of {} {} with {} blocks", node, storage, storage.numBlocks()); } node.getLeavingServiceStatus().setStartTime(monotonicNow()); pendingNodes.add(node); } } else { LOG.trace("startDecommission: Node {} in {}, nothing to do.", node, node.getAdminState()); } }
LOG.debug("Processing {} node {}", dn.getAdminState(), dn); pruneReliableBlocks(dn, blocks); + "marked as {}.", dn, dn.getAdminState()); } else { LOG.debug("Node {} {} healthy." + " It needs to replicate {} more blocks." + " {} is still in progress.", dn, isHealthy ? "is": "isn't", blocks.size(), dn.getAdminState()); dn, blocks.size(), dn.getAdminState());
/** * Start maintenance of the specified datanode. * @param node */ @VisibleForTesting public void startMaintenance(DatanodeDescriptor node, long maintenanceExpireTimeInMS) { // Even if the node is already in maintenance, we still need to adjust // the expiration time. node.setMaintenanceExpireTimeInMS(maintenanceExpireTimeInMS); if (!node.isMaintenance()) { // Update DN stats maintained by HeartbeatManager hbManager.startMaintenance(node); // hbManager.startMaintenance will set dead node to IN_MAINTENANCE. if (node.isEnteringMaintenance()) { for (DatanodeStorageInfo storage : node.getStorageInfos()) { LOG.info("Starting maintenance of {} {} with {} blocks", node, storage, storage.numBlocks()); } node.getLeavingServiceStatus().setStartTime(monotonicNow()); } // Track the node regardless whether it is ENTERING_MAINTENANCE or // IN_MAINTENANCE to support maintenance expiration. pendingNodes.add(node); } else { LOG.trace("startMaintenance: Node {} in {}, nothing to do.", node, node.getAdminState()); } }
.put("lastContact", getLastContact(node)) .put("usedSpace", getDfsUsed(node)) .put("adminState", node.getAdminState().toString()) .put("nonDfsUsedSpace", node.getNonDfsUsed()) .put("capacity", node.getCapacity())
/** * Stop decommissioning the specified datanode. * @param node */ @VisibleForTesting public void stopDecommission(DatanodeDescriptor node) { if (node.isDecommissionInProgress() || node.isDecommissioned()) { // Update DN stats maintained by HeartbeatManager hbManager.stopDecommission(node); // Over-replicated blocks will be detected and processed when // the dead node comes back and send in its full block report. if (node.isAlive) { blockManager.processOverReplicatedBlocksOnReCommission(node); } // Remove from tracking in DecommissionManager pendingNodes.remove(node); decomNodeBlocks.remove(node); } else { LOG.trace("stopDecommission: Node {} in {}, nothing to do." + node, node.getAdminState()); } }
/** * Start decommissioning the specified datanode. * @param node */ @VisibleForTesting public void startDecommission(DatanodeDescriptor node) { if (!node.isDecommissionInProgress() && !node.isDecommissioned()) { // Update DN stats maintained by HeartbeatManager hbManager.startDecommission(node); // hbManager.startDecommission will set dead node to decommissioned. if (node.isDecommissionInProgress()) { for (DatanodeStorageInfo storage : node.getStorageInfos()) { LOG.info("Starting decommission of {} {} with {} blocks", node, storage, storage.numBlocks()); } node.decommissioningStatus.setStartTime(monotonicNow()); pendingNodes.add(node); } } else { LOG.trace("startDecommission: Node {} in {}, nothing to do." + node, node.getAdminState()); } }
/** * Stop decommissioning the specified datanode. * @param node */ @VisibleForTesting public void stopDecommission(DatanodeDescriptor node) { if (node.isDecommissionInProgress() || node.isDecommissioned()) { // Update DN stats maintained by HeartbeatManager hbManager.stopDecommission(node); // Over-replicated blocks will be detected and processed when // the dead node comes back and send in its full block report. if (node.isAlive) { blockManager.processOverReplicatedBlocksOnReCommission(node); } // Remove from tracking in DecommissionManager pendingNodes.remove(node); decomNodeBlocks.remove(node); } else { LOG.trace("stopDecommission: Node {} in {}, nothing to do." + node, node.getAdminState()); } }
.put("lastContact", getLastContact(node)) .put("usedSpace", getDfsUsed(node)) .put("adminState", node.getAdminState().toString()) .put("nonDfsUsedSpace", node.getNonDfsUsed()) .put("capacity", node.getCapacity())
/** * Start decommissioning the specified datanode. * @param node */ @VisibleForTesting public void startDecommission(DatanodeDescriptor node) { if (!node.isDecommissionInProgress() && !node.isDecommissioned()) { // Update DN stats maintained by HeartbeatManager hbManager.startDecommission(node); // hbManager.startDecommission will set dead node to decommissioned. if (node.isDecommissionInProgress()) { for (DatanodeStorageInfo storage : node.getStorageInfos()) { LOG.info("Starting decommission of {} {} with {} blocks", node, storage, storage.numBlocks()); } node.decommissioningStatus.setStartTime(monotonicNow()); pendingNodes.add(node); } } else { LOG.trace("startDecommission: Node {} in {}, nothing to do." + node, node.getAdminState()); } }
.put("lastContact", getLastContact(node)) .put("usedSpace", getDfsUsed(node)) .put("adminState", node.getAdminState().toString()) .put("nonDfsUsedSpace", node.getNonDfsUsed()) .put("capacity", node.getCapacity())