public synchronized boolean nextKeyValue() throws IOException, InterruptedException { while (super.nextKeyValue()) { key = super.getCurrentKey(); if (filter.accept(key)) { value = super.getCurrentValue(); return true; } } return false; }
public SequenceFileAsTextRecordReader() throws IOException { sequenceFileRecordReader = new SequenceFileRecordReader<WritableComparable<?>, Writable>(); }
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { sequenceFileRecordReader.initialize(split, context); }
private Stream<KV<Text, LongWritable>> extractResultsFromFile(String fileName) { try (SequenceFileRecordReader<Text, LongWritable> reader = new SequenceFileRecordReader<>()) { Path path = new Path(fileName); TaskAttemptContext taskContext = HadoopFormats.createTaskAttemptContext(new Configuration(), new JobID("readJob", 0), 0); reader.initialize( new FileSplit(path, 0L, Long.MAX_VALUE, new String[] {"localhost"}), taskContext); List<KV<Text, LongWritable>> result = new ArrayList<>(); while (reader.nextKeyValue()) { result.add( KV.of( new Text(reader.getCurrentKey().toString()), new LongWritable(reader.getCurrentValue().get()))); } return result.stream(); } catch (Exception e) { throw new RuntimeException(e); } }
private void openForRead(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { reader = new SequenceFileRecordReader<>(); reader .initialize(new FileSplit(chunkFilePath, 0, getFileSize(chunkFilePath, configuration), null), taskAttemptContext); }
@Override public Tuple getNext() throws IOException { boolean next; try { next = reader.nextKeyValue(); } catch (InterruptedException e) { throw new IOException(e); } if (!next) return null; Object value = reader.getCurrentValue(); if (value == null) { return null; } if(!(value instanceof Tuple)) { return null; } return (Tuple) value; }
/** * Implementation of RecordReader::getCurrentValue(). * @return The value of the current record. (i.e. the target-path.) * @throws IOException * @throws InterruptedException */ @Override public V getCurrentValue() throws IOException, InterruptedException { return chunk.getReader().getCurrentValue(); }
if (chunk.getReader().nextKeyValue()) { ++numRecordsProcessedByThisMap; return true; if (chunk.getReader().nextKeyValue()) { ++numRecordsProcessedByThisMap; return true;
/** * Implementation of RecordReader::getCurrentKey(). * @return The key of the current record. (i.e. the source-path.) * @throws IOException * @throws InterruptedException */ @Override public K getCurrentKey() throws IOException, InterruptedException { return chunk.getReader().getCurrentKey(); }
public synchronized void close() throws IOException { sequenceFileRecordReader.close(); } }
public float getProgress() throws IOException, InterruptedException { return sequenceFileRecordReader.getProgress(); }
@Override public Function<String, Stream<Pair<Text, LongWritable>>> extractOutputFunction( String outputDir, Configuration conf) { return part -> ExceptionUtils.unchecked( () -> { try (final SequenceFileRecordReader<Text, LongWritable> reader = new SequenceFileRecordReader<>()) { final Path path = new Path(outputDir + "/" + part); final TaskAttemptContext taskContext = HadoopUtils.createTaskContext(new Configuration(), HadoopUtils.getJobID(), 0); reader.initialize( new FileSplit(path, 0L, Long.MAX_VALUE, new String[] {"localhost"}), taskContext); final List<Pair<Text, LongWritable>> result = new ArrayList<>(); while (reader.nextKeyValue()) { result.add(Pair.of(reader.getCurrentKey(), reader.getCurrentValue())); } return result.stream(); } }); }
private void openForRead(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { reader = new SequenceFileRecordReader<>(); reader .initialize(new FileSplit(chunkFilePath, 0, getFileSize(chunkFilePath, configuration), null), taskAttemptContext); }
public synchronized boolean nextKeyValue() throws IOException, InterruptedException { int count = 0; data.clear(); while (sequenceFileRecordReader.nextKeyValue()) { JSONObject obj = new JSONObject(sequenceFileRecordReader.getCurrentValue().toString()); data.add(obj); count++; if (count == batchSize) { break; } } return (!data.isEmpty()); }
/** * Implementation of RecordReader::getCurrentValue(). * * @return The value of the current record. (i.e. the target-path.) * @throws IOException, on failure. * @throws InterruptedException */ @Override public V getCurrentValue() throws IOException, InterruptedException { return chunk.getReader().getCurrentValue(); }
if (chunk.getReader().nextKeyValue()) { ++numRecordsProcessedByThisMap; return true; if (chunk.getReader().nextKeyValue()) { ++numRecordsProcessedByThisMap; return true;
/** * Implementation of RecordReader::getCurrentKey(). * @return The key of the current record. (i.e. the source-path.) * @throws IOException * @throws InterruptedException */ @Override public K getCurrentKey() throws IOException, InterruptedException { return chunk.getReader().getCurrentKey(); }
public synchronized void close() throws IOException { sequenceFileRecordReader.close(); } }
public float getProgress() throws IOException, InterruptedException { return sequenceFileRecordReader.getProgress(); }
public synchronized boolean nextKeyValue() throws IOException, InterruptedException { while (super.nextKeyValue()) { key = super.getCurrentKey(); if (filter.accept(key)) { value = super.getCurrentValue(); return true; } } return false; }