@Override public List<InputSplit> getSplits(JobContext ctx) throws IOException, InterruptedException { List<InputSplit> res = new ArrayList<>(BLOCK_CNT); for (int i = 0; i < BLOCK_CNT; i++) try { res.add(new FileSplit(new Path(new URI("someFile")), i, i + 1, new String[] {"localhost"})); } catch (URISyntaxException e) { throw new IOException(e); } return res; }
/** * Returns a split for each store files directory using the block location * of each file as locality reference. */ @Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = new ArrayList<>(); List<FileStatus> files = listStatus(job); Text key = new Text(); for (FileStatus file: files) { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job.getConfiguration()); LineReader reader = new LineReader(fs.open(path)); long pos = 0; int n; try { while ((n = reader.readLine(key)) > 0) { String[] hosts = getStoreDirHosts(fs, path); splits.add(new FileSplit(path, pos, n, hosts)); pos += n; } } finally { reader.close(); } } return splits; }
/** * @param clsName Input split class name. * @param in Input stream. * @param hosts Optional hosts. * @return File block or {@code null} if it is not a {@link FileSplit} instance. * @throws IgniteCheckedException If failed. */ public static HadoopFileBlock readFileBlock(String clsName, DataInput in, @Nullable String[] hosts) throws IgniteCheckedException { if (!FileSplit.class.getName().equals(clsName)) return null; FileSplit split = new FileSplit(); try { split.readFields(in); } catch (IOException e) { throw new IgniteCheckedException(e); } if (hosts == null) hosts = EMPTY_HOSTS; return new HadoopFileBlock(hosts, split.getPath().toUri(), split.getStart(), split.getLength()); } }
/** {@inheritDoc} */ @Override public InputSplit getInputSplit() { if (inputSplit == null) { HadoopInputSplit split = ctx.taskInfo().inputSplit(); if (split == null) return null; if (split instanceof HadoopFileBlock) { HadoopFileBlock fileBlock = (HadoopFileBlock)split; inputSplit = new FileSplit(new Path(fileBlock.file()), fileBlock.start(), fileBlock.length(), null); } else { try { inputSplit = (InputSplit) ((HadoopV2TaskContext)ctx).getNativeSplit(split); } catch (IgniteCheckedException e) { throw new IllegalStateException(e); } } } return inputSplit; }
@Override public void initialize(InputSplit unusedSplit, TaskAttemptContext cx) throws IOException, InterruptedException { super.initialize( new FileSplit(this.split.getPath(this.idx), this.split.getOffset(this.idx), this.split.getLength(this.idx), null), cx); }
/** * Tests serialization of wrapper and the wrapped native split. * @throws Exception If fails. */ @Test public void testSerialization() throws Exception { FileSplit nativeSplit = new FileSplit(new Path("/path/to/file"), 100, 500, new String[]{"host1", "host2"}); assertEquals("/path/to/file:100+500", nativeSplit.toString()); HadoopSplitWrapper split = HadoopUtils.wrapSplit(10, nativeSplit, nativeSplit.getLocations()); assertEquals("[host1, host2]", Arrays.toString(split.hosts())); ByteArrayOutputStream buf = new ByteArrayOutputStream(); ObjectOutput out = new ObjectOutputStream(buf); out.writeObject(split); ObjectInput in = new ObjectInputStream(new ByteArrayInputStream(buf.toByteArray())); final HadoopSplitWrapper res = (HadoopSplitWrapper)in.readObject(); assertEquals("/path/to/file:100+500", HadoopUtils.unwrapSplit(res).toString()); GridTestUtils.assertThrows(log, new Callable<Object>() { @Override public Object call() throws Exception { res.hosts(); return null; } }, AssertionError.class, null); }
private static SortedSet<byte[]> readFileToSearch(final Configuration conf, final FileSystem fs, final LocatedFileStatus keyFileStatus) throws IOException, InterruptedException { SortedSet<byte []> result = new TreeSet<>(Bytes.BYTES_COMPARATOR); // Return entries that are flagged Counts.UNDEFINED in the value. Return the row. This is // what is missing. TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); try (SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader rr = new SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader()) { InputSplit is = new FileSplit(keyFileStatus.getPath(), 0, keyFileStatus.getLen(), new String [] {}); rr.initialize(is, context); while (rr.nextKeyValue()) { rr.getCurrentKey(); BytesWritable bw = rr.getCurrentValue(); if (Verify.VerifyReducer.whichType(bw.getBytes()) == Verify.Counts.UNDEFINED) { byte[] key = new byte[rr.getCurrentKey().getLength()]; System.arraycopy(rr.getCurrentKey().getBytes(), 0, key, 0, rr.getCurrentKey() .getLength()); result.add(key); } } } return result; } }
public ArrayList<String> readRecords(URL testFileUrl, int splitSize) throws IOException { // Set up context File testFile = new File(testFileUrl.getFile()); long testFileSize = testFile.length(); Path testFilePath = new Path(testFile.getAbsolutePath()); Configuration conf = new Configuration(); conf.setInt("io.file.buffer.size", 1); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // Gather the records returned by the record reader ArrayList<String> records = new ArrayList<String>(); long offset = 0; while (offset < testFileSize) { FileSplit split = new FileSplit(testFilePath, offset, splitSize, null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context); while (reader.nextKeyValue()) { records.add(reader.getCurrentValue().toString()); } offset += splitSize; } return records; }
@Test public void testMaxBlockLocationsNewSplits() throws Exception { TEST_DIR.mkdirs(); try { Configuration conf = new Configuration(); conf.setInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY, 4); Path submitDir = new Path(TEST_DIR.getAbsolutePath()); FileSystem fs = FileSystem.getLocal(conf); FileSplit split = new FileSplit(new Path("/some/path"), 0, 1, new String[] { "loc1", "loc2", "loc3", "loc4", "loc5" }); JobSplitWriter.createSplitFiles(submitDir, conf, fs, new FileSplit[] { split }); JobSplit.TaskSplitMetaInfo[] infos = SplitMetaInfoReader.readSplitMetaInfo(new JobID(), fs, conf, submitDir); assertEquals("unexpected number of splits", 1, infos.length); assertEquals("unexpected number of split locations", 4, infos[0].getLocations().length); } finally { FileUtil.fullyDelete(TEST_DIR); } }
FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[])null); LineRecordReader reader = new LineRecordReader();
/** * A factory that makes the split for this class. It can be overridden * by sub-classes to make sub-types */ protected FileSplit makeSplit(Path file, long start, long length, String[] hosts, String[] inMemoryHosts) { return new FileSplit(file, start, length, hosts, inMemoryHosts); }
/** * A factory that makes the split for this class. It can be overridden * by sub-classes to make sub-types */ protected FileSplit makeSplit(Path file, long start, long length, String[] hosts, String[] inMemoryHosts) { return new FileSplit(file, start, length, hosts, inMemoryHosts); }
/** * A factory that makes the split for this class. It can be overridden * by sub-classes to make sub-types */ protected FileSplit makeSplit(Path file, long start, long length, String[] hosts) { return new FileSplit(file, start, length, hosts); }
@Test public void testMultipleClose() throws IOException { URL testFileUrl = getClass().getClassLoader(). getResource("recordSpanningMultipleSplits.txt.bz2"); assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl); File testFile = new File(testFileUrl.getFile()); Path testFilePath = new Path(testFile.getAbsolutePath()); long testFileSize = testFile.length(); Configuration conf = new Configuration(); conf.setInt(org.apache.hadoop.mapreduce.lib.input. LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // read the data and check whether BOM is skipped FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context); //noinspection StatementWithEmptyBody while (reader.nextKeyValue()) ; reader.close(); reader.close(); BZip2Codec codec = new BZip2Codec(); codec.setConf(conf); Set<Decompressor> decompressors = new HashSet<Decompressor>(); for (int i = 0; i < 10; ++i) { decompressors.add(CodecPool.getDecompressor(codec)); } assertEquals(10, decompressors.size()); }
FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[])null); LineRecordReader reader = new LineRecordReader(recordDelimiterBytes); split = new FileSplit(testFilePath, 0, firstSplitLength, (String[])null); reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); split = new FileSplit(testFilePath, firstSplitLength, testFileSize - firstSplitLength, (String[])null); reader = new LineRecordReader(recordDelimiterBytes);
conf.setInt(org.apache.hadoop.mapreduce.lib.input. LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); FileSplit split = new FileSplit(inputFile, 0, 15, (String[])null); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); split = new FileSplit(inputFile, 15, 4, (String[])null); reader = new LineRecordReader(null); reader.initialize(split, context); split = new FileSplit(inputFile, 0, 12, (String[])null); reader = new LineRecordReader(null); reader.initialize(split, context);
byte[] recordDelimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8); int splitLength = 15; FileSplit split = new FileSplit(inputFile, 0, splitLength, (String[])null); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertNull("Unexpected key returned", key); reader.close(); split = new FileSplit(inputFile, splitLength, inputData.length() - splitLength, (String[])null); reader = new LineRecordReader(recordDelimiterBytes); inputFile = createInputFile(conf, inputData); splitLength = 5; split = new FileSplit(inputFile, 0, splitLength, (String[])null); reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); reader.close(); split = new FileSplit(inputFile, splitLength, inputData.length () - splitLength, (String[])null); reader = new LineRecordReader(recordDelimiterBytes); split = new FileSplit(inputFile, 0, bufferSize, (String[]) null); reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context);
private void openForRead(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { reader = new SequenceFileRecordReader<K, V>(); reader.initialize(new FileSplit(chunkFilePath, 0, DistCpUtils.getFileSize(chunkFilePath, chunkContext.getConfiguration()), null), taskAttemptContext); }
private CarbonInputSplit convertToCarbonInputSplit(ExtendedBlocklet blocklet) throws IOException { CarbonInputSplit split = CarbonInputSplit .from(blocklet.getSegmentId(), blocklet.getBlockletId(), new FileSplit(new Path(blocklet.getPath()), 0, blocklet.getLength(), blocklet.getLocations()), ColumnarFormatVersion.valueOf((short) blocklet.getDetailInfo().getVersionNumber()), blocklet.getDataMapWriterPath()); split.setDetailInfo(blocklet.getDetailInfo()); return split; }