@Override public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException { FileInputSplit[] splits = new FileInputSplit[minNumSplits]; for (int i = 0; i < minNumSplits; i++) { splits[i] = new FileInputSplit(i, getFilePaths()[0], i * linesPerSplit + 1, linesPerSplit, null); } return splits; }
private FileInputSplit createTempDeflateFile(String content) throws IOException { File tempFile = File.createTempFile("test_contents", "tmp.deflate"); tempFile.deleteOnExit(); DataOutputStream dos = new DataOutputStream(new DeflaterOutputStream(new FileOutputStream(tempFile))); dos.writeBytes(content); dos.close(); return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[] {"localhost"}); }
static FileInputSplit createTempFile(String contents, String charset) throws IOException { File tempFile = File.createTempFile("test_contents", "tmp"); tempFile.deleteOnExit(); try (Writer out = new OutputStreamWriter(new FileOutputStream(tempFile), charset)) { out.write(contents); } return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[] {"localhost"}); }
private FileInputSplit createTempFile(String content) throws IOException { File tempFile = File.createTempFile("test_contents", "tmp"); tempFile.deleteOnExit(); OutputStreamWriter wrt = new OutputStreamWriter( new FileOutputStream(tempFile), StandardCharsets.UTF_8 ); wrt.write(content); wrt.close(); return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[] {"localhost"}); }
static FileInputSplit createTempFile(String contents) throws IOException { File tempFile = File.createTempFile("test_contents", "tmp"); tempFile.deleteOnExit(); try (Writer out = new OutputStreamWriter(new FileOutputStream(tempFile))) { out.write(contents); } return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[] {"localhost"}); }
private FileInputSplit createTempGzipFile(String content) throws IOException { File tempFile = File.createTempFile("test_contents", "tmp.gz"); tempFile.deleteOnExit(); DataOutputStream dos = new DataOutputStream(new GZIPOutputStream(new FileOutputStream(tempFile))); dos.writeBytes(content); dos.close(); return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[] {"localhost"}); }
private FileInputSplit createInputSplit(String content) throws IOException { File tempFile = File.createTempFile("test_contents", "tmp"); tempFile.deleteOnExit(); try (FileWriter wrt = new FileWriter(tempFile)) { wrt.write(content); } return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[] {"localhost"}); }
private static FileInputSplit createTempFile(String content) throws IOException { File tempFile = File.createTempFile("test_contents", "tmp"); tempFile.deleteOnExit(); OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile), StandardCharsets.UTF_8); wrt.write(content); wrt.close(); return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[]{"localhost"}); }
@Override public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException { final List<FileStatus> files = this.getFiles(); final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits); for (FileStatus file : files) { final FileSystem fs = file.getPath().getFileSystem(); final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize; for (long pos = 0, length = file.getLen(); pos < length; pos += blockSize) { long remainingLength = Math.min(pos + blockSize, length) - pos; // get the block locations and make sure they are in order with respect to their offset final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength); Arrays.sort(blocks); inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts())); } } if (inputSplits.size() < minNumSplits) { LOG.warn(String.format( "With the given block size %d, the files %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, Arrays.toString(getFilePaths()), minNumSplits)); FileStatus last = files.get(files.size() - 1); final BlockLocation[] blocks = last.getPath().getFileSystem().getFileBlockLocations(last, 0, last.getLen()); for (int index = files.size(); index < minNumSplits; index++) { inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts())); } } return inputSplits.toArray(new FileInputSplit[inputSplits.size()]); }
len = READ_WHOLE_SPLIT_FLAG; FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), 0, len, hosts.toArray(new String[hosts.size()])); inputSplits.add(fis); FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position, splitSize, blocks[blockIndex].getHosts()); inputSplits.add(fis); final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position, bytesUnassigned, blocks[blockIndex].getHosts()); inputSplits.add(fis); hosts = new String[0]; final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), 0, 0, hosts); inputSplits.add(fis);
FileInputSplit split = new FileInputSplit(0, file.getPath(), offset, file.getLen() - offset, null);
FileInputSplit split = new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[]{ "localhost" });
FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames()); FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames());
final FileInputSplit split = createTempFile(myString); FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames()); FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames());
/** * Tests that the records are read correctly when the split boundary is in the middle of a record. */ @Test public void testReadOverSplitBoundariesUnaligned() throws IOException { final String myString = "value1\nvalue2\nvalue3"; final FileInputSplit split = createTempFile(myString); FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames()); FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames()); final Configuration parameters = new Configuration(); format.configure(parameters); format.open(split1); assertEquals("value1", format.nextRecord(null)); assertEquals("value2", format.nextRecord(null)); assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); format.open(split2); assertEquals("value3", format.nextRecord(null)); assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); }
private void createSplitForUnSplittableFile( LocatedFileStatus file, BlockingQueue<FileInputSplit> inputSplits, AtomicInteger splitNumber) throws IOException { final FileSystem fs = file.getPath().getFileSystem(); final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, file.getLen()); final Set<String> hosts = new HashSet<>(); for (BlockLocation block : blocks) { hosts.addAll(Arrays.asList(block.getHosts())); } long len = file.getLen(); if (testForUnsplittable(file)) { len = READ_WHOLE_SPLIT_FLAG; } FileInputSplit fis = new FileInputSplit(splitNumber.incrementAndGet(), file.getPath(), 0, len, hosts.toArray(new String[0])); inputSplits.add(fis); }
@Test public void parseBitcoinTransaction() throws HadoopCryptoLedgerConfigurationException, IOException { ClassLoader classLoader = getClass().getClassLoader(); String fileName="genesis.blk"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); FileInputSplit blockInputSplit = new FileInputSplit(0,file,0, -1, null); BitcoinTransactionFlinkInputFormat inputFormat = new BitcoinTransactionFlinkInputFormat(1024*1024, "F9BEB4D9",false); inputFormat.open(blockInputSplit); assertFalse(inputFormat.reachedEnd(),"End not reached"); BitcoinTransaction reuse = new BitcoinTransaction(); int transactCount=0; while (!inputFormat.reachedEnd() && (inputFormat.nextRecord(reuse)!=null)) { transactCount++; } assertEquals( 1, transactCount,"Genesis Block must contain exactly one transactions"); } }
@Test public void parseEthereumBlock1346406() throws IOException { ClassLoader classLoader = getClass().getClassLoader(); String fileName="eth1346406.bin"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); FileInputSplit blockInputSplit = new FileInputSplit(0,file,0, -1, null); EthereumBlockFlinkInputFormat inputFormat = new EthereumBlockFlinkInputFormat(1024*1024, false); inputFormat.open(blockInputSplit); assertFalse(inputFormat.reachedEnd(),"End not reached"); EthereumBlock reuse = new EthereumBlock(); EthereumBlock nextBlock = inputFormat.nextRecord(reuse); assertNotNull(nextBlock,"First Block returned"); assertEquals(6,nextBlock.getEthereumTransactions().size(),"First block contains exactly 6 transactions"); nextBlock=inputFormat.nextRecord(reuse); assertNull(nextBlock,"No further block"); assertTrue(inputFormat.reachedEnd(),"End reached"); }
@Test public void parseBitcoinRawBlock() throws HadoopCryptoLedgerConfigurationException, IOException { ClassLoader classLoader = getClass().getClassLoader(); String fileName="genesis.blk"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); FileInputSplit blockInputSplit = new FileInputSplit(0,file,0, -1, null); BitcoinRawBlockFlinkInputFormat inputFormat = new BitcoinRawBlockFlinkInputFormat(1024*1024,"F9BEB4D9",false); inputFormat.open(blockInputSplit); assertFalse(inputFormat.reachedEnd(),"End not reached"); BytesWritable reuse = new BytesWritable(); BytesWritable nextBlock = inputFormat.nextRecord(reuse); assertNotNull(nextBlock,"First Block returned"); assertEquals( 293, nextBlock.getLength(),"First Block must have size of 293"); nextBlock=inputFormat.nextRecord(reuse); assertNull(nextBlock,"No further block"); assertTrue(inputFormat.reachedEnd(),"End reached"); }
@Test public void parseBitcoinBlockGenesis() throws HadoopCryptoLedgerConfigurationException, IOException { ClassLoader classLoader = getClass().getClassLoader(); String fileName="genesis.blk"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); FileInputSplit blockInputSplit = new FileInputSplit(0,file,0, -1, null); BitcoinBlockFlinkInputFormat inputFormat = new BitcoinBlockFlinkInputFormat(1024*1024, "F9BEB4D9",false); inputFormat.open(blockInputSplit); assertFalse(inputFormat.reachedEnd(),"End not reached"); BitcoinBlock reuse = new BitcoinBlock(); BitcoinBlock nextBlock = inputFormat.nextRecord(reuse); assertNotNull(nextBlock,"First Block returned"); assertEquals(1,nextBlock.getTransactions().size(),"First block contains exactly one transction"); nextBlock=inputFormat.nextRecord(reuse); assertNull(nextBlock,"No further block"); assertTrue(inputFormat.reachedEnd(),"End reached"); }