public SymlinkTextInputSplit(Path symlinkPath, FileSplit split) throws IOException { super(symlinkPath, 0, 0, split.getLocations()); this.split = split; }
public SymlinkTextInputSplit(Path symlinkPath, FileSplit split) throws IOException { super(symlinkPath, 0, 0, split.getLocations()); this.split = split; }
/** * Returns a string representation of a Hadoop {@link FileSplit}. * * @param fs Hadoop {@link FileSplit} * @return its string representation */ public static String toStringHadoopFileSplit(FileSplit fs) { StringBuilder sb = new StringBuilder(); sb.append("HadoopFileSplit: Path: ").append(fs.getPath()); sb.append(" , Start: ").append(fs.getStart()); sb.append(" , Length: ").append(fs.getLength()); sb.append(" , Hosts: "); String[] locs; try { locs = fs.getLocations(); } catch (IOException e) { LOG.error(e.getMessage()); locs = new String[] {}; } for (String loc : locs) { sb.append(loc).append("; "); } return sb.toString(); }
private FileSplit createMockFileSplit(boolean createOrcSplit, String fakePathString, long start, long length, String[] locations) throws IOException { FileSplit fileSplit; if (createOrcSplit) { fileSplit = mock(OrcSplit.class); } else { fileSplit = mock(FileSplit.class); } doReturn(start).when(fileSplit).getStart(); doReturn(length).when(fileSplit).getLength(); doReturn(new Path(fakePathString)).when(fileSplit).getPath(); doReturn(locations).when(fileSplit).getLocations(); doReturn(locations).when(fileSplit).getLocations(); return fileSplit; }
/** * @param jobConf Job configuration. * @return Collection of mapped splits. * @throws IgniteCheckedException If mapping failed. */ public static Collection<HadoopInputSplit> splitJob(JobConf jobConf) throws IgniteCheckedException { try { InputFormat<?, ?> format = jobConf.getInputFormat(); assert format != null; InputSplit[] splits = format.getSplits(jobConf, 0); Collection<HadoopInputSplit> res = new ArrayList<>(splits.length); for (int i = 0; i < splits.length; i++) { InputSplit nativeSplit = splits[i]; if (nativeSplit instanceof FileSplit) { FileSplit s = (FileSplit)nativeSplit; res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength())); } else res.add(HadoopUtils.wrapSplit(i, nativeSplit, nativeSplit.getLocations())); } return res; } catch (IOException e) { throw new IgniteCheckedException(e); } }
public SymlinkTextInputSplit(Path symlinkPath, FileSplit split) throws IOException { super(symlinkPath, 0, 0, split.getLocations()); this.split = split; }
public SymlinkTextInputSplit(Path symlinkPath, FileSplit split) throws IOException { super(symlinkPath, 0, 0, split.getLocations()); this.split = split; }
@Test public void testSplitLocationInfo() throws Exception { Configuration conf = getConfiguration(); conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, "test:///a1/a2"); JobConf job = new JobConf(conf); TextInputFormat fileInputFormat = new TextInputFormat(); fileInputFormat.configure(job); FileSplit[] splits = (FileSplit[]) fileInputFormat.getSplits(job, 1); String[] locations = splits[0].getLocations(); Assert.assertEquals(2, locations.length); SplitLocationInfo[] locationInfo = splits[0].getLocationInfo(); Assert.assertEquals(2, locationInfo.length); SplitLocationInfo localhostInfo = locations[0].equals("localhost") ? locationInfo[0] : locationInfo[1]; SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ? locationInfo[0] : locationInfo[1]; Assert.assertTrue(localhostInfo.isOnDisk()); Assert.assertTrue(localhostInfo.isInMemory()); Assert.assertTrue(otherhostInfo.isOnDisk()); Assert.assertFalse(otherhostInfo.isInMemory()); }
/** * Returns a string representation of a Hadoop {@link FileSplit}. * * @param fs Hadoop {@link FileSplit} * @return its string representation */ public static String toStringHadoopFileSplit(FileSplit fs) { StringBuilder sb = new StringBuilder(); sb.append("HadoopFileSplit: Path: ").append(fs.getPath()); sb.append(" , Start: ").append(fs.getStart()); sb.append(" , Length: ").append(fs.getLength()); sb.append(" , Hosts: "); String[] locs; try { locs = fs.getLocations(); } catch (IOException e) { LOG.error(e.getMessage()); locs = new String[] {}; } for (String loc : locs) { sb.append(loc).append("; "); } return sb.toString(); }
/** * Returns a string representation of a Hadoop {@link FileSplit}. * * @param fs Hadoop {@link FileSplit} * @return its string representation */ public static String toStringHadoopFileSplit(FileSplit fs) { StringBuilder sb = new StringBuilder(); sb.append("HadoopFileSplit: Path: ").append(fs.getPath()); sb.append(" , Start: ").append(fs.getStart()); sb.append(" , Length: ").append(fs.getLength()); sb.append(" , Hosts: "); String[] locs; try { locs = fs.getLocations(); } catch (IOException e) { LOG.error(e.getMessage()); locs = new String[] {}; } for (String loc : locs) { sb.append(loc).append("; "); } return sb.toString(); }
/** * Returns a string representation of a Hadoop <code>FileSplit</code>. * * @param fs Hadoop <code>FileSplit</code> * @return its string representation */ public static String toStringHadoopFileSplit(FileSplit fs) { StringBuilder sb = new StringBuilder(); sb.append("HadoopFileSplit: Path: ").append(fs.getPath()); sb.append(" , Start: ").append(fs.getStart()); sb.append(" , Length: ").append(fs.getLength()); sb.append(" , Hosts: "); String[] locs; try { locs = fs.getLocations(); } catch (IOException e) { LOG.error(e.getMessage()); locs = new String[] {}; } for (String loc : locs) { sb.append(loc).append("; "); } return sb.toString(); }
public HoodieRealtimeFileSplit(FileSplit baseSplit, String basePath, List<String> deltaLogFiles, String maxCommitTime) throws IOException { super(baseSplit.getPath(), baseSplit.getStart(), baseSplit.getLength(), baseSplit.getLocations()); this.deltaFilePaths = deltaLogFiles; this.maxCommitTime = maxCommitTime; this.basePath = basePath; }
/** * Combines two file splits into a CombineFileSplit. * @param conf * @param split1 * @param split2 * @return * @throws IOException */ public static InputSplit combineFileSplits(JobConf conf, FileSplit split1, FileSplit split2) throws IOException { Path[] paths = new Path[2]; long[] starts = new long[2]; long[] lengths = new long[2]; Vector<String> vlocations = new Vector<String>(); paths[0] = split1.getPath(); starts[0] = split1.getStart(); lengths[0] = split1.getLength(); vlocations.addAll(Arrays.asList(split1.getLocations())); paths[1] = split2.getPath(); starts[1] = split2.getStart(); lengths[1] = split2.getLength(); vlocations.addAll(Arrays.asList(split2.getLocations())); String[] locations = prioritizeLocations(vlocations); return new CombineFileSplit(conf, paths, starts, lengths, locations); }
public PailInputSplit(FileSystem fs, String root, PailSpec spec, JobConf job, FileSplit split) throws IOException { super(split.getPath(), split.getStart(), split.getLength(), (String[])null); _spec = spec; _hosts = split.getLocations(); setRelPath(fs, root); }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { JobConf jobConf = (JobConf) HadoopCompat.getConfiguration(context); initInputFormat(jobConf); org.apache.hadoop.mapred.InputSplit[] splits = realInputFormat.getSplits(jobConf, jobConf.getNumMapTasks()); if (splits == null) { return null; } List<InputSplit> resultSplits = new ArrayList<InputSplit>(splits.length); for (org.apache.hadoop.mapred.InputSplit split : splits) { if (split.getClass() == org.apache.hadoop.mapred.FileSplit.class) { org.apache.hadoop.mapred.FileSplit mapredFileSplit = ((org.apache.hadoop.mapred.FileSplit)split); resultSplits.add(new FileSplit(mapredFileSplit.getPath(), mapredFileSplit.getStart(), mapredFileSplit.getLength(), mapredFileSplit.getLocations())); } else { resultSplits.add(new InputSplitWrapper(split)); } } return resultSplits; }
/** * Initialize the RecordReader with an InputSplit and a Configuration. * Note that this method is not called from within Hadoop as in * {@link com.mongodb.hadoop.input.BSONFileRecordReader}; it exists for * consistency. * * @param inputSplit the FileSplit over which to iterate BSONWritables. * @param conf the job's Configuration. * @throws IOException when there is an error opening the file */ public void initialize(final InputSplit inputSplit, final Configuration conf) throws IOException { FileSplit fileSplit = (FileSplit) inputSplit; try { delegate.init( new BSONFileSplit( fileSplit.getPath(), fileSplit.getStart(), fileSplit.getLength(), fileSplit.getLocations()), conf); } catch (InterruptedException e) { throw new RuntimeException(e); } }
/** * Builds a {@code ParquetInputSplit} from a mapred * {@link org.apache.hadoop.mapred.FileSplit}. * * @param split a mapreduce FileSplit * @return a ParquetInputSplit * @throws IOException */ static ParquetInputSplit from(org.apache.hadoop.mapred.FileSplit split) throws IOException { return new ParquetInputSplit(split.getPath(), split.getStart(), split.getStart() + split.getLength(), split.getLength(), split.getLocations(), null); }
/** * Builds a {@code ParquetInputSplit} from a mapred * {@link org.apache.hadoop.mapred.FileSplit}. * * @param split a mapred FileSplit * @return a ParquetInputSplit * @throws IOException if there is an error while creating the Parquet split */ static ParquetInputSplit from(org.apache.hadoop.mapred.FileSplit split) throws IOException { return new ParquetInputSplit(split.getPath(), split.getStart(), split.getStart() + split.getLength(), split.getLength(), split.getLocations(), null); }
/** * Builds a {@code ParquetInputSplit} from a mapred * {@link org.apache.hadoop.mapred.FileSplit}. * * @param split a mapred FileSplit * @return a ParquetInputSplit * @throws IOException if there is an error while creating the Parquet split */ static ParquetInputSplit from(org.apache.hadoop.mapred.FileSplit split) throws IOException { return new ParquetInputSplit(split.getPath(), split.getStart(), split.getStart() + split.getLength(), split.getLength(), split.getLocations(), null); }
@Override public RecordReader<NullWritable, BSONWritable> getRecordReader(final InputSplit split, final JobConf job, final Reporter reporter) throws IOException { FileSplit fileSplit = (FileSplit) split; FileSystem fs = FileSystem.get(fileSplit.getPath().toUri(), job); if (split instanceof BSONFileSplit || !isSplitable(fs, fileSplit.getPath())) { BSONFileRecordReader reader = new BSONFileRecordReader(); reader.initialize(split, job); return reader; } // Split was not created by BSONSplitter. BSONSplitter splitter = new BSONSplitter(); splitter.setConf(job); splitter.setInputPath(fileSplit.getPath()); org.apache.hadoop.mapreduce.lib.input.FileSplit newStyleFileSplit = new org.apache.hadoop.mapreduce.lib.input.FileSplit( fileSplit.getPath(), fileSplit.getStart(), fileSplit.getLength(), fileSplit.getLocations()); long start = splitter.getStartingPositionForSplit(newStyleFileSplit); BSONFileRecordReader reader = new BSONFileRecordReader(start); reader.initialize(fileSplit, job); return reader; } }