|| (vertexType == VertexType.INITIALIZED_EDGES)) { hiveEdgeManagerDesc = EdgeManagerPluginDescriptor.create(CustomPartitionEdge.class.getName()); UserPayload payload = getBytePayload(bucketToTaskMap); hiveEdgeManagerDesc.setUserPayload(payload); processAllSideEvents(entry.getKey(), entry.getValue()); setVertexParallelismAndRootInputSpec(inputNameInputSpecMap); inputToGroupedSplitMap.clear(); setVertexParallelismAndRootInputSpec(inputNameInputSpecMap); sendBucketIdsToProcessor();
private void processAllSideEventsSetParallelism(String inputName, Multimap<Integer, InputSplit> bucketToGroupedSplitMap) throws IOException { // the bucket to task map should have been setup by the big table. LOG.info("Processing events for input " + inputName); if (inputNameInputSpecMap.get(mainWorkName) == null) { LOG.info("We don't have a routing table yet. Will need to wait for the main input " + mainWorkName + " initialization"); inputToGroupedSplitMap.put(inputName, bucketToGroupedSplitMap); return; } processAllSideEvents(inputName, bucketToGroupedSplitMap); setVertexParallelismAndRootInputSpec(inputNameInputSpecMap); }
FileSplit fileSplit; try { fileSplit = getFileSplitFromEvent(diEvent); } catch (IOException e) { throw new RuntimeException("Failed to get file split for event: " + diEvent, e); getBucketSplitMapForPath(inputName, pathFileSplitsMap); processAllEvents(inputName, bucketToGroupedSplitMap, secondLevelGroupingDone); } else { SplitLocationProvider splitLocationProvider = Utils.getSplitLocationProvider(conf, LOG); processAllSideEventsSetParallelism(inputName, bucketToGroupedSplitMap);
|| (vertexType == VertexType.INITIALIZED_EDGES)) { hiveEdgeManagerDesc = EdgeManagerPluginDescriptor.create(CustomPartitionEdge.class.getName()); UserPayload payload = getBytePayload(bucketToTaskMap); hiveEdgeManagerDesc.setUserPayload(payload); if (inputToGroupedSplitMap.isEmpty() == false) { for (Entry<String, Multimap<Integer, InputSplit>> entry : inputToGroupedSplitMap.entrySet()) { processAllSideEvents(entry.getKey(), entry.getValue()); setVertexParallelismAndRootInputSpec(inputNameInputSpecMap); inputToGroupedSplitMap.clear(); setVertexParallelismAndRootInputSpec(inputNameInputSpecMap);
@Test(timeout = 5000) public void testGetBytePayload() throws IOException { int numBuckets = 10; VertexManagerPluginContext context = mock(VertexManagerPluginContext.class); CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(numBuckets, TezWork.VertexType.INITIALIZED_EDGES); DataOutputBuffer dob = new DataOutputBuffer(); vertexConf.write(dob); UserPayload payload = UserPayload.create(ByteBuffer.wrap(dob.getData())); when(context.getUserPayload()).thenReturn(payload); CustomPartitionVertex vm = new CustomPartitionVertex(context); vm.initialize(); // prepare empty routing table Multimap<Integer, Integer> routingTable = HashMultimap.<Integer, Integer> create(); payload = vm.getBytePayload(routingTable); // get conf from user payload CustomEdgeConfiguration edgeConf = new CustomEdgeConfiguration(); DataInputByteBuffer dibb = new DataInputByteBuffer(); dibb.reset(payload.getPayload()); edgeConf.readFields(dibb); assertEquals(numBuckets, edgeConf.getNumBuckets()); } }
@Override public void initialize() { this.context = getContext(); ByteBuffer payload = context.getUserPayload().getPayload(); CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(); DataInputByteBuffer dibb = new DataInputByteBuffer(); dibb.reset(payload); try { vertexConf.readFields(dibb); } catch (IOException e) { throw new RuntimeException(e); } this.numBuckets = vertexConf.getNumBuckets(); this.mainWorkName = vertexConf.getInputName(); this.vertexType = vertexConf.getVertexType(); this.numInputsAffectingRootInputSpecUpdate = vertexConf.getNumInputs(); }
FileSplit fileSplit; try { fileSplit = getFileSplitFromEvent(diEvent); } catch (IOException e) { throw new RuntimeException("Failed to get file split for event: " + diEvent, e); getBucketSplitMapForPath(pathFileSplitsMap); processAllEvents(inputName, bucketToGroupedSplitMap, secondLevelGroupingDone); } else { SplitLocationProvider splitLocationProvider = Utils.getSplitLocationProvider(conf, LOG); processAllSideEventsSetParallelism(inputName, bucketToGroupedSplitMap);
|| (vertexType == VertexType.INITIALIZED_EDGES)) { hiveEdgeManagerDesc = EdgeManagerPluginDescriptor.create(CustomPartitionEdge.class.getName()); UserPayload payload = getBytePayload(bucketToTaskMap); hiveEdgeManagerDesc.setUserPayload(payload); if (inputToGroupedSplitMap.isEmpty() == false) { for (Entry<String, Multimap<Integer, InputSplit>> entry : inputToGroupedSplitMap.entrySet()) { processAllSideEvents(entry.getKey(), entry.getValue()); setVertexParallelismAndRootInputSpec(inputNameInputSpecMap); inputToGroupedSplitMap.clear(); setVertexParallelismAndRootInputSpec(inputNameInputSpecMap);
private void processAllSideEventsSetParallelism(String inputName, Multimap<Integer, InputSplit> bucketToGroupedSplitMap) throws IOException { // the bucket to task map should have been setup by the big table. LOG.info("Processing events for input " + inputName); if (inputNameInputSpecMap.get(mainWorkName) == null) { LOG.info("We don't have a routing table yet. Will need to wait for the main input " + mainWorkName + " initialization"); inputToGroupedSplitMap.put(inputName, bucketToGroupedSplitMap); return; } processAllSideEvents(inputName, bucketToGroupedSplitMap); setVertexParallelismAndRootInputSpec(inputNameInputSpecMap); }
@Override public void initialize() { this.context = getContext(); ByteBuffer payload = context.getUserPayload().getPayload(); CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(); DataInputByteBuffer dibb = new DataInputByteBuffer(); dibb.reset(payload); try { vertexConf.readFields(dibb); } catch (IOException e) { throw new RuntimeException(e); } this.numBuckets = vertexConf.getNumBuckets(); this.mainWorkName = vertexConf.getInputName(); this.vertexType = vertexConf.getVertexType(); this.numInputsAffectingRootInputSpecUpdate = vertexConf.getNumInputs(); this.inputToBucketMap = vertexConf.getInputToBucketMap(); }
FileSplit fileSplit; try { fileSplit = getFileSplitFromEvent(diEvent); } catch (IOException e) { throw new RuntimeException("Failed to get file split for event: " + diEvent, e); getBucketSplitMapForPath(pathFileSplitsMap); processAllEvents(inputName, bucketToGroupedSplitMap, secondLevelGroupingDone); } else { processAllSideEventsSetParallelism(inputName, bucketToGroupedSplitMap);
private void processAllSideEventsSetParallelism(String inputName, Multimap<Integer, InputSplit> bucketToGroupedSplitMap) throws IOException { // the bucket to task map should have been setup by the big table. LOG.info("Processing events for input " + inputName); if (bucketToTaskMap.isEmpty()) { LOG.info("We don't have a routing table yet. Will need to wait for the main input" + " initialization"); inputToGroupedSplitMap.put(inputName, bucketToGroupedSplitMap); return; } processAllSideEvents(inputName, bucketToGroupedSplitMap); setVertexParallelismAndRootInputSpec(inputNameInputSpecMap); }
@Override public void initialize() { this.context = getContext(); ByteBuffer payload = context.getUserPayload().getPayload(); CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(); DataInputByteBuffer dibb = new DataInputByteBuffer(); dibb.reset(payload); try { vertexConf.readFields(dibb); } catch (IOException e) { throw new RuntimeException(e); } this.numBuckets = vertexConf.getNumBuckets(); this.mainWorkName = vertexConf.getInputName(); this.vertexType = vertexConf.getVertexType(); this.numInputsAffectingRootInputSpecUpdate = vertexConf.getNumInputs(); }