Refine search
@Override public boolean nextKeyValue() throws IOException, InterruptedException { while (reader.nextKeyValue()) { // TODO titan05 integration -- the duplicate() call may be unnecessary final TinkerVertex maybeNullTinkerVertex = deser.readHadoopVertex(reader.getCurrentKey(), reader.getCurrentValue()); if (null != maybeNullTinkerVertex) { vertex = new VertexWritable(maybeNullTinkerVertex); //vertexQuery.filterRelationsOf(vertex); // TODO reimplement vertexquery filtering return true; } } return false; }
@Override public void open(HadoopInputSplit split) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { TaskAttemptContext context = new TaskAttemptContextImpl(configuration, new TaskAttemptID()); try { this.recordReader = this.mapreduceInputFormat .createRecordReader(split.getHadoopInputSplit(), context); this.recordReader.initialize(split.getHadoopInputSplit(), context); } catch (InterruptedException e) { throw new IOException("Could not create RecordReader.", e); } finally { this.fetched = false; } } }
@Override public boolean hasNext() { try { boolean retVal = curRecReader.nextKeyValue(); if (retVal) { return true; } // if its false, we need to close recordReader. curRecReader.close(); return false; } catch (IOException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } }
@Override public Tuple2<K, V> nextRecord(Tuple2<K, V> record) throws IOException { if (!this.fetched) { fetchNext(); } if (!this.hasNext) { return null; } try { record.f0 = recordReader.getCurrentKey(); record.f1 = recordReader.getCurrentValue(); } catch (InterruptedException e) { throw new IOException("Could not get KeyValue pair.", e); } this.fetched = false; return record; }
@Test public void readBitcoinRawBlockInputFormatBlockVersion1() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="version1.blk"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for block version 1"); RecordReader<BytesWritable, BytesWritable> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); BytesWritable key = new BytesWritable(); BytesWritable block = new BytesWritable(); assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block"); block=reader.getCurrentValue(); assertEquals( 482, block.getLength(),"Random block version 1 must have size of 482 bytes"); assertFalse( reader.nextKeyValue(),"No further blocks in block version 1"); reader.close(); }
expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes(); expect(inputSplit.getStart()).andReturn(0L).anyTimes(); expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes(); expect(context.getConfiguration()).andReturn(conf).anyTimes(); recordReader.initialize(inputSplit, context); 0.0f, recordReader.getProgress(), 0.0f); assertTrue("Expected at least one record", recordReader.nextKeyValue()); key = recordReader.getCurrentKey(); value = recordReader.getCurrentValue(); key == recordReader.getCurrentKey()); assertTrue("getCurrentValue() returned different values for the same record", value == recordReader.getCurrentValue()); assertTrue("Expected to read a second record", recordReader.nextKeyValue()); key = recordReader.getCurrentKey(); value = recordReader.getCurrentValue(); 1.0f, recordReader.getProgress(), 0.0f); assertFalse("Expected only 2 records", recordReader.nextKeyValue()); recordReader.close();
private void writeThenReadByRecordReader(int intervalRecordCount, int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec) throws IOException, InterruptedException { Path testDir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred/testsmallfirstsplit"); Path testFile = new Path(testDir, "test_rcfile"); fs.delete(testFile, true); Configuration cloneConf = new Configuration(conf); jonconf.set("mapred.input.dir", testDir.toString()); JobContext context = new Job(jonconf); HiveConf.setLongVar(context.getConfiguration(), HiveConf.ConfVars.MAPREDMAXSPLITSIZE, maxSplitSize); new TaskAttemptID()); RecordReader<LongWritable, BytesRefArrayWritable> rr = inputFormat.createRecordReader(splits.get(i), tac); rr.initialize(splits.get(i), tac); while (rr.nextKeyValue()) { readCount++;
private static List<Text> readSplit(KeyValueTextInputFormat format, InputSplit split, Job job) throws IOException, InterruptedException { List<Text> result = new ArrayList<Text>(); Configuration conf = job.getConfiguration(); TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(conf); RecordReader<Text, Text> reader = format.createRecordReader(split, MapReduceTestUtil.createDummyMapTaskAttemptContext(conf)); MapContext<Text, Text, Text, Text> mcontext = new MapContextImpl<Text, Text, Text, Text>(conf, context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); while (reader.nextKeyValue()) { result.add(new Text(reader.getCurrentValue())); } return result; }
@Test public void readExcelInputFormatExcel2013SingleSheetEncryptedNegative() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "excel2013encrypt.xlsx"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // for decryption simply set the password conf.set("hadoopoffice.read.security.crypt.password", "test2"); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); InterruptedException ex = assertThrows(InterruptedException.class, () -> reader.initialize(splits.get(0), context), "Exception is thrown in case of wrong password"); }
private void runCDXTest(Configuration conf, String expected) throws Exception { File testFile = new File("src/test/resources/rr-test-inputs.txt"); Path path = new Path(testFile.getAbsoluteFile().toURI().toString()); FileSplit split = new FileSplit(path, 0, testFile.length(), null); ArchiveToCDXFileInputFormat inputFormat = ReflectionUtils .newInstance(ArchiveToCDXFileInputFormat.class, conf); TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID()); RecordReader<Text, Text> reader = inputFormat.createRecordReader(split, context); reader.initialize(split, context); int position = 0; String value = ""; while (reader.nextKeyValue() != false) { position += 1; if (position == 3) value = reader.getCurrentValue().toString(); } // Check the third value is as expected log.debug(value); Assert.assertEquals(expected, value); }
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { LOG.debug("initialize({}, {})", genericSplit, context); // Assuming file split if (!(genericSplit instanceof FileSplit)) throw new IOException("This record reader only supports FileSplit inputs"); // Find RDF language FileSplit split = (FileSplit) genericSplit; Path path = split.getPath(); Lang lang = RDFLanguages.filenameToLang(path.getName()); if (lang == null) throw new IOException("There is no registered RDF language for the input file " + path.toString()); // Select the record reader and initialize this.reader = this.selectRecordReader(lang); this.reader.initialize(split, context); }
@Override public String getCurrentValue() throws IOException, InterruptedException { Text text = delegate.getCurrentValue(); return text == null ? null : text.toString(); }
@Test public void testRecordReader() throws Exception { List<String> paths = Lists.newArrayList("/path1", "/path2"); GobblinWorkUnitsInputFormat.GobblinSplit split = new GobblinWorkUnitsInputFormat.GobblinSplit(paths); GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat(); RecordReader<LongWritable, Text> recordReader = inputFormat.createRecordReader(split, new TaskAttemptContextImpl(new Configuration(), new TaskAttemptID("a", 1, TaskType.MAP, 1, 1))); recordReader.nextKeyValue(); Assert.assertEquals(recordReader.getCurrentKey().get(), 0); Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path1"); recordReader.nextKeyValue(); Assert.assertEquals(recordReader.getCurrentKey().get(), 1); Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path2"); Assert.assertFalse(recordReader.nextKeyValue()); }
job.getConfiguration().getBoolean(SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_KEY, SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_DEFAULT); when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration()); RecordReader<ImmutableBytesWritable, Result> rr = tsif.createRecordReader(split, taskAttemptContext); rr.initialize(split, taskAttemptContext); while (rr.nextKeyValue()) { byte[] row = rr.getCurrentKey().get(); verifyRowFromMap(rr.getCurrentKey(), rr.getCurrentValue()); rowTracker.addRow(row); rr.close();
/** * Simple case for record readers. * @throws Exception if failed */ @Test public void reader_simple() throws Exception { Configuration conf = new ConfigurationProvider().newInstance(); FileStatus stat = write(conf, 1); try (RecordReader<NullWritable, Text> reader = TemporaryInputFormat.createRecordReader()) { reader.initialize( new FileSplit(stat.getPath(), 0, stat.getLen(), null), new TaskAttemptContextImpl(conf, new TaskAttemptID())); assertThat(reader.nextKeyValue(), is(true)); assertThat(reader.getCurrentValue(), is(new Text("Hello, world!"))); assertThat(reader.nextKeyValue(), is(false)); assertThat((double) reader.getProgress(), closeTo(1.0, 0.01)); } }
public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl( job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie){ System.err.println("Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } };
/** * Verifies that a non-null record reader can be created, and the key/value types are * as expected. */ @Test public void testCreateRecordReader() throws IOException, InterruptedException { // Set up the job configuration. Job job = new Job(); AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.STRING)); Configuration conf = job.getConfiguration(); FileSplit inputSplit = createMock(FileSplit.class); TaskAttemptContext context = createMock(TaskAttemptContext.class); expect(context.getConfiguration()).andReturn(conf).anyTimes(); replay(inputSplit); replay(context); AvroKeyInputFormat inputFormat = new AvroKeyInputFormat(); @SuppressWarnings("unchecked") RecordReader<AvroKey<Object>, NullWritable> recordReader = inputFormat.createRecordReader( inputSplit, context); assertNotNull(inputFormat); recordReader.close(); verify(inputSplit); verify(context); } }
public Object[] getSample(InputFormat inf, Job job) throws IOException, InterruptedException { long counter = 0; List<InputSplit> splits = inf.getSplits(job); ArrayList<K> samples = new ArrayList<K>(numSamples); int splitsToSample = Math.min(maxSplitsSampled, splits.size()); job.getConfiguration(), new TaskAttemptID()); RecordReader<K, V> reader = inf.createRecordReader(splits.get(i), samplingContext); reader.initialize(splits.get(i), samplingContext); while (reader.nextKeyValue()) { if (r.nextDouble() <= freq) { if (samples.size() < numSamples) { LOG.info(String.format("Fill: Collected %d samples from %d splits", counter, i)); counter++; samples.add(ReflectionUtils.copy(job.getConfiguration(), reader.getCurrentKey(), null)); } else { samples.set(ind, ReflectionUtils.copy(job.getConfiguration(), reader.getCurrentKey(), null)); if (counter % 1000 == 0) LOG.info(String.format("Replace Random: Collected %d samples from %d splits", counter, i)); reader.close();
@Override public void initialize(final InputSplit inputSplit, final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { reader.initialize(inputSplit, taskAttemptContext); final Configuration conf = taskAttemptContext.getConfiguration(); if (conf.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null) { graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(conf), Constants.GREMLIN_HADOOP_GRAPH_FILTER); } }
Job job = new Job(jobConf); TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(job.getConfiguration(), reporter); recordReader = createRecordReader(tableSplit, tac); try { recordReader.initialize(tableSplit, tac); } catch (InterruptedException e) {