@Override public boolean next() throws IOException { getContext().notifyProgress(); return false; }
void processSplitEvent(InputDataInformationEvent event) throws IOException { rrLock.lock(); try { initFromEventInternal(event); if (LOG.isDebugEnabled()) { LOG.debug(getContext().getSourceVertexName() + " notifying on RecordReader initialized"); } rrInited.signal(); } finally { rrLock.unlock(); } }
void checkAndAwaitRecordReaderInitialization() throws IOException { assert rrLock.getHoldCount() == 1; rrLock.lock(); try { if (LOG.isDebugEnabled()) { LOG.debug(getContext().getSourceVertexName() + " awaiting RecordReader initialization"); } rrInited.await(); } catch (Exception e) { throw new IOException( "Interrupted waiting for RecordReader initiailization"); } finally { rrLock.unlock(); } }
@Override public List<Event> initialize() throws IOException { super.initialize(); getContext().inputIsReady(); this.splitInfoViaEvents = jobConf.getBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS_DEFAULT); LOG.info(getContext().getSourceVertexName() + " using newmapreduce API=" + useNewApi + ", split via event=" + splitInfoViaEvents + ", numPhysicalInputs=" + getNumPhysicalInputs()); initializeInternal(); return null; }
@Override public List<Event> close() throws IOException { mrReader.close(); long inputRecords = getContext().getCounters() .findCounter(TaskCounter.INPUT_RECORDS_PROCESSED).getValue(); getContext().getStatisticsReporter().reportItemsProcessed(inputRecords); return null; }
private void initFromEventInternal(InputDataInformationEvent initEvent) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug(getContext().getSourceVertexName() + " initializing RecordReader from event"); LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " + split.getClass().getName() + ", NewSplit: " + split + ", length: " + splitLength); splitLength = split.getLength(); if (LOG.isDebugEnabled()) { LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " + split.getClass().getName() + ", OldSplit: " + split + ", length: " + splitLength); getContext().getCounters().findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES) .increment(splitLength); LOG.info(getContext().getSourceVertexName() + " initialized RecordReader from event");
mrReader = new MRReaderMapReduce(jobConf, getContext().getCounters(), inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(), getContext() .getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext() .getTaskIndex(), getContext().getTaskAttemptNumber(), getContext()); } else { mrReader = new MRReaderMapred(jobConf, getContext().getCounters(), inputRecordCounter, getContext()); TaskSplitMetaInfo thisTaskMetaInfo = allMetaInfo[getContext().getTaskIndex()]; TaskSplitIndex splitMetaInfo = new TaskSplitIndex(thisTaskMetaInfo.getSplitLocation(), thisTaskMetaInfo.getStartOffset()); if (useNewApi) { org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils .getNewSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters() .findCounter(TaskCounter.SPLIT_RAW_BYTES)); try { mrReader = new MRReaderMapReduce(jobConf, newInputSplit, getContext().getCounters(), inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(), getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(), getContext().getTaskIndex(), getContext().getTaskAttemptNumber(), getContext()); } else { org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils .getOldSplitDetailsFromDisk(splitMetaInfo, jobConf, getContext().getCounters() .findCounter(TaskCounter.SPLIT_RAW_BYTES)); splitLength = oldInputSplit.getLength(); mrReader = new MRReaderMapred(jobConf, oldInputSplit, getContext().getCounters(),
public static Configuration getInputConfiguration( LogicalInput input ) { try { if( input instanceof MergedLogicalInput ) input = (LogicalInput) Util.getFirst( ( (MergedLogicalInput) input ).getInputs() ); if( input instanceof MRInput ) return createConfFromByteString( parseMRInputPayload( ( (MRInput) input ).getContext().getUserPayload() ).getConfigurationBytes() ); if( input instanceof AbstractLogicalInput ) return createConfFromUserPayload( ( (AbstractLogicalInput) input ).getContext().getUserPayload() ); } catch( IOException exception ) { throw new FlowException( "unable to unpack payload", exception ); } throw new IllegalStateException( "unknown input type: " + input.getClass().getName() ); }
public static Configuration getInputConfiguration( LogicalInput input ) { try { if( input instanceof MergedLogicalInput ) input = (LogicalInput) Util.getFirst( ( (MergedLogicalInput) input ).getInputs() ); if( input instanceof MRInput ) return createConfFromByteString( parseMRInputPayload( ( (MRInput) input ).getContext().getUserPayload() ).getConfigurationBytes() ); if( input instanceof AbstractLogicalInput ) return createConfFromUserPayload( ( (AbstractLogicalInput) input ).getContext().getUserPayload() ); } catch( IOException exception ) { throw new FlowException( "unable to unpack payload", exception ); } throw new IllegalStateException( "unknown input type: " + input.getClass().getName() ); }