private String getWorkUnitName() { StringBuilder sb = new StringBuilder(); sb.append("["); sb.append(StringUtils.stripToEmpty(this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_SCHEMA))); sb.append("_"); sb.append(StringUtils.stripToEmpty(this.workUnitState.getProp(ConfigurationKeys.SOURCE_ENTITY))); sb.append("_"); String id = this.workUnitState.getId(); int seqIndex = id.lastIndexOf("_", id.length()); if (seqIndex > 0) { String timeSeqStr = id.substring(0, seqIndex); int timeIndex = timeSeqStr.lastIndexOf("_", timeSeqStr.length()); if (timeIndex > 0) { sb.append(id.substring(timeIndex + 1)); } } sb.append("]"); return sb.toString(); }
@Override public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException { if (!workUnitState.contains(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY)) { throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId()); } JobConf jobConf = new JobConf(new Configuration()); for (String key : workUnitState.getPropertyNames()) { jobConf.set(key, workUnitState.getProp(key)); } String fileSplitBytesStr = workUnitState.getProp(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY); FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr); FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, jobConf); RecordReader<K, V> recordReader = fileInputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL); boolean readKeys = workUnitState.getPropAsBoolean( HadoopFileInputSource.FILE_INPUT_READ_KEYS_KEY, HadoopFileInputSource.DEFAULT_FILE_INPUT_READ_KEYS); return getExtractor(workUnitState, recordReader, fileSplit, readKeys); }
LOG.warn(String.format("Branch %d of WorkUnit %s produced no data", branchId, state.getId())); return;
@Override public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException { if (!workUnitState.contains(FILE_SPLIT_BYTES_STRING_KEY)) { throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId()); } Configuration configuration = new Configuration(); FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, configuration); String fileSplitBytesStr = workUnitState.getProp(FILE_SPLIT_BYTES_STRING_KEY); FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr); TaskAttemptContext taskAttemptContext = getTaskAttemptContext(configuration, DummyTaskAttemptIDFactory.newTaskAttemptID()); try { RecordReader<K, V> recordReader = fileInputFormat.createRecordReader(fileSplit, taskAttemptContext); recordReader.initialize(fileSplit, taskAttemptContext); boolean readKeys = workUnitState.getPropAsBoolean(FILE_INPUT_READ_KEYS_KEY, DEFAULT_FILE_INPUT_READ_KEYS); return getExtractor(workUnitState, recordReader, fileSplit, readKeys); } catch (InterruptedException ie) { throw new IOException(ie); } }
private String getWorkUnitName() { StringBuilder sb = new StringBuilder(); sb.append("["); sb.append(StringUtils.stripToEmpty(this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_SCHEMA))); sb.append("_"); sb.append(StringUtils.stripToEmpty(this.workUnitState.getProp(ConfigurationKeys.SOURCE_ENTITY))); sb.append("_"); String id = this.workUnitState.getId(); int seqIndex = id.lastIndexOf("_", id.length()); if (seqIndex > 0) { String timeSeqStr = id.substring(0, seqIndex); int timeIndex = timeSeqStr.lastIndexOf("_", timeSeqStr.length()); if (timeIndex > 0) { sb.append(id.substring(timeIndex + 1)); } } sb.append("]"); return sb.toString(); }
@Override public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException { if (!workUnitState.contains(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY)) { throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId()); } JobConf jobConf = new JobConf(new Configuration()); for (String key : workUnitState.getPropertyNames()) { jobConf.set(key, workUnitState.getProp(key)); } String fileSplitBytesStr = workUnitState.getProp(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY); FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr); FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, jobConf); RecordReader<K, V> recordReader = fileInputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL); boolean readKeys = workUnitState.getPropAsBoolean( HadoopFileInputSource.FILE_INPUT_READ_KEYS_KEY, HadoopFileInputSource.DEFAULT_FILE_INPUT_READ_KEYS); return getExtractor(workUnitState, recordReader, fileSplit, readKeys); }
LOG.warn(String.format("Branch %d of WorkUnit %s produced no data", branchId, state.getId())); return;
@Override public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException { if (!workUnitState.contains(FILE_SPLIT_BYTES_STRING_KEY)) { throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId()); } Configuration configuration = new Configuration(); FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, configuration); String fileSplitBytesStr = workUnitState.getProp(FILE_SPLIT_BYTES_STRING_KEY); FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr); TaskAttemptContext taskAttemptContext = getTaskAttemptContext(configuration, DummyTaskAttemptIDFactory.newTaskAttemptID()); try { RecordReader<K, V> recordReader = fileInputFormat.createRecordReader(fileSplit, taskAttemptContext); recordReader.initialize(fileSplit, taskAttemptContext); boolean readKeys = workUnitState.getPropAsBoolean(FILE_INPUT_READ_KEYS_KEY, DEFAULT_FILE_INPUT_READ_KEYS); return getExtractor(workUnitState, recordReader, fileSplit, readKeys); } catch (InterruptedException ie) { throw new IOException(ie); } }