org.apache.hadoop.mapred.FileSplit.getLocations java code examples

public SymlinkTextInputSplit(Path symlinkPath, FileSplit split) throws IOException {
 super(symlinkPath, 0, 0, split.getLocations());
 this.split = split;
}

public SymlinkTextInputSplit(Path symlinkPath, FileSplit split) throws IOException {
 super(symlinkPath, 0, 0, split.getLocations());
 this.split = split;
}

/**
 * Returns a string representation of a Hadoop {@link FileSplit}.
 *
 * @param fs Hadoop {@link FileSplit}
 * @return its string representation
 */
public static String toStringHadoopFileSplit(FileSplit fs) {
 StringBuilder sb = new StringBuilder();
 sb.append("HadoopFileSplit: Path: ").append(fs.getPath());
 sb.append(" , Start: ").append(fs.getStart());
 sb.append(" , Length: ").append(fs.getLength());
 sb.append(" , Hosts: ");
 String[] locs;
 try {
  locs = fs.getLocations();
 } catch (IOException e) {
  LOG.error(e.getMessage());
  locs = new String[] {};
 }
 for (String loc : locs) {
  sb.append(loc).append("; ");
 }
 return sb.toString();
}

private FileSplit createMockFileSplit(boolean createOrcSplit, String fakePathString, long start,
                    long length, String[] locations) throws IOException {
 FileSplit fileSplit;
 if (createOrcSplit) {
  fileSplit = mock(OrcSplit.class);
 } else {
  fileSplit = mock(FileSplit.class);
 }
 doReturn(start).when(fileSplit).getStart();
 doReturn(length).when(fileSplit).getLength();
 doReturn(new Path(fakePathString)).when(fileSplit).getPath();
 doReturn(locations).when(fileSplit).getLocations();
 doReturn(locations).when(fileSplit).getLocations();
 return fileSplit;
}

/**
 * @param jobConf Job configuration.
 * @return Collection of mapped splits.
 * @throws IgniteCheckedException If mapping failed.
 */
public static Collection<HadoopInputSplit> splitJob(JobConf jobConf) throws IgniteCheckedException {
  try {
    InputFormat<?, ?> format = jobConf.getInputFormat();
    assert format != null;
    InputSplit[] splits = format.getSplits(jobConf, 0);
    Collection<HadoopInputSplit> res = new ArrayList<>(splits.length);
    for (int i = 0; i < splits.length; i++) {
      InputSplit nativeSplit = splits[i];
      if (nativeSplit instanceof FileSplit) {
        FileSplit s = (FileSplit)nativeSplit;
        res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength()));
      }
      else
        res.add(HadoopUtils.wrapSplit(i, nativeSplit, nativeSplit.getLocations()));
    }
    return res;
  }
  catch (IOException e) {
    throw new IgniteCheckedException(e);
  }
}

public SymlinkTextInputSplit(Path symlinkPath, FileSplit split) throws IOException {
 super(symlinkPath, 0, 0, split.getLocations());
 this.split = split;
}

public SymlinkTextInputSplit(Path symlinkPath, FileSplit split) throws IOException {
 super(symlinkPath, 0, 0, split.getLocations());
 this.split = split;
}

@Test
public void testSplitLocationInfo() throws Exception {
 Configuration conf = getConfiguration();
 conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
   "test:///a1/a2");
 JobConf job = new JobConf(conf);
 TextInputFormat fileInputFormat = new TextInputFormat();
 fileInputFormat.configure(job);
 FileSplit[] splits = (FileSplit[]) fileInputFormat.getSplits(job, 1);
 String[] locations = splits[0].getLocations();
 Assert.assertEquals(2, locations.length);
 SplitLocationInfo[] locationInfo = splits[0].getLocationInfo();
 Assert.assertEquals(2, locationInfo.length);
 SplitLocationInfo localhostInfo = locations[0].equals("localhost") ?
   locationInfo[0] : locationInfo[1];
 SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ?
   locationInfo[0] : locationInfo[1];
 Assert.assertTrue(localhostInfo.isOnDisk());
 Assert.assertTrue(localhostInfo.isInMemory());
 Assert.assertTrue(otherhostInfo.isOnDisk());
 Assert.assertFalse(otherhostInfo.isInMemory());
}

/**
 * Returns a string representation of a Hadoop {@link FileSplit}.
 *
 * @param fs Hadoop {@link FileSplit}
 * @return its string representation
 */
public static String toStringHadoopFileSplit(FileSplit fs) {
 StringBuilder sb = new StringBuilder();
 sb.append("HadoopFileSplit: Path: ").append(fs.getPath());
 sb.append(" , Start: ").append(fs.getStart());
 sb.append(" , Length: ").append(fs.getLength());
 sb.append(" , Hosts: ");
 String[] locs;
 try {
  locs = fs.getLocations();
 } catch (IOException e) {
  LOG.error(e.getMessage());
  locs = new String[] {};
 }
 for (String loc : locs) {
  sb.append(loc).append("; ");
 }
 return sb.toString();
}

/**
 * Returns a string representation of a Hadoop {@link FileSplit}.
 *
 * @param fs Hadoop {@link FileSplit}
 * @return its string representation
 */
public static String toStringHadoopFileSplit(FileSplit fs) {
 StringBuilder sb = new StringBuilder();
 sb.append("HadoopFileSplit: Path: ").append(fs.getPath());
 sb.append(" , Start: ").append(fs.getStart());
 sb.append(" , Length: ").append(fs.getLength());
 sb.append(" , Hosts: ");
 String[] locs;
 try {
  locs = fs.getLocations();
 } catch (IOException e) {
  LOG.error(e.getMessage());
  locs = new String[] {};
 }
 for (String loc : locs) {
  sb.append(loc).append("; ");
 }
 return sb.toString();
}

/**
 * Returns a string representation of a Hadoop <code>FileSplit</code>.
 *
 * @param fs Hadoop <code>FileSplit</code>
 * @return its string representation
 */
public static String toStringHadoopFileSplit(FileSplit fs) {
 StringBuilder sb = new StringBuilder();
 sb.append("HadoopFileSplit: Path: ").append(fs.getPath());
 sb.append(" , Start: ").append(fs.getStart());
 sb.append(" , Length: ").append(fs.getLength());
 sb.append(" , Hosts: ");
 String[] locs;
 try {
  locs = fs.getLocations();
 } catch (IOException e) {
  LOG.error(e.getMessage());
  locs = new String[] {};
 }
 for (String loc : locs) {
  sb.append(loc).append("; ");
 }
 return sb.toString();
}

public HoodieRealtimeFileSplit(FileSplit baseSplit, String basePath, List<String> deltaLogFiles,
  String maxCommitTime) throws IOException {
 super(baseSplit.getPath(), baseSplit.getStart(), baseSplit.getLength(),
   baseSplit.getLocations());
 this.deltaFilePaths = deltaLogFiles;
 this.maxCommitTime = maxCommitTime;
 this.basePath = basePath;
}

/**
 * Combines two file splits into a CombineFileSplit.
 * @param conf
 * @param split1
 * @param split2
 * @return
 * @throws IOException 
 */
public static InputSplit combineFileSplits(JobConf conf,
  FileSplit split1, FileSplit split2) throws IOException {
 Path[] paths = new Path[2];
 long[] starts = new long[2];
 long[] lengths = new long[2];
 Vector<String> vlocations = new Vector<String>();
 paths[0] = split1.getPath();
 starts[0] = split1.getStart();
 lengths[0] = split1.getLength();
 vlocations.addAll(Arrays.asList(split1.getLocations()));
 paths[1] = split2.getPath();
 starts[1] = split2.getStart();
 lengths[1] = split2.getLength();
 vlocations.addAll(Arrays.asList(split2.getLocations()));
 String[] locations = prioritizeLocations(vlocations);
 return new CombineFileSplit(conf, paths, starts, lengths, locations);
}

public PailInputSplit(FileSystem fs, String root, PailSpec spec, JobConf job, FileSplit split) throws IOException {
  super(split.getPath(), split.getStart(), split.getLength(), (String[])null);
  _spec = spec;
  _hosts = split.getLocations();
  setRelPath(fs, root);
}

@Override
public List<InputSplit> getSplits(JobContext context)
                 throws IOException, InterruptedException {
 JobConf jobConf = (JobConf) HadoopCompat.getConfiguration(context);
 initInputFormat(jobConf);
 org.apache.hadoop.mapred.InputSplit[] splits =
  realInputFormat.getSplits(jobConf, jobConf.getNumMapTasks());
 if (splits == null) {
  return null;
 }
 List<InputSplit> resultSplits = new ArrayList<InputSplit>(splits.length);
 for (org.apache.hadoop.mapred.InputSplit split : splits) {
  if (split.getClass() == org.apache.hadoop.mapred.FileSplit.class) {
   org.apache.hadoop.mapred.FileSplit mapredFileSplit =
    ((org.apache.hadoop.mapred.FileSplit)split);
   resultSplits.add(new FileSplit(mapredFileSplit.getPath(),
                   mapredFileSplit.getStart(),
                   mapredFileSplit.getLength(),
                   mapredFileSplit.getLocations()));
  } else {
   resultSplits.add(new InputSplitWrapper(split));
  }
 }
 return resultSplits;
}

/**
 * Initialize the RecordReader with an InputSplit and a Configuration.
 * Note that this method is not called from within Hadoop as in
 * {@link com.mongodb.hadoop.input.BSONFileRecordReader}; it exists for
 * consistency.
 *
 * @param inputSplit the FileSplit over which to iterate BSONWritables.
 * @param conf the job's Configuration.
 * @throws IOException when there is an error opening the file
 */
public void initialize(final InputSplit inputSplit, final Configuration conf)
 throws IOException {
  FileSplit fileSplit = (FileSplit) inputSplit;
  try {
    delegate.init(
     new BSONFileSplit(
      fileSplit.getPath(), fileSplit.getStart(),
      fileSplit.getLength(), fileSplit.getLocations()), conf);
  } catch (InterruptedException e) {
    throw new RuntimeException(e);
  }
}

/**
 * Builds a {@code ParquetInputSplit} from a mapred
 * {@link org.apache.hadoop.mapred.FileSplit}.
 *
 * @param split a mapreduce FileSplit
 * @return a ParquetInputSplit
 * @throws IOException
 */
static ParquetInputSplit from(org.apache.hadoop.mapred.FileSplit split) throws IOException {
 return new ParquetInputSplit(split.getPath(),
   split.getStart(), split.getStart() + split.getLength(),
   split.getLength(), split.getLocations(), null);
}

/**
 * Builds a {@code ParquetInputSplit} from a mapred
 * {@link org.apache.hadoop.mapred.FileSplit}.
 *
 * @param split a mapred FileSplit
 * @return a ParquetInputSplit
 * @throws IOException if there is an error while creating the Parquet split
 */
static ParquetInputSplit from(org.apache.hadoop.mapred.FileSplit split) throws IOException {
 return new ParquetInputSplit(split.getPath(),
   split.getStart(), split.getStart() + split.getLength(),
   split.getLength(), split.getLocations(), null);
}

/**
 * Builds a {@code ParquetInputSplit} from a mapred
 * {@link org.apache.hadoop.mapred.FileSplit}.
 *
 * @param split a mapred FileSplit
 * @return a ParquetInputSplit
 * @throws IOException if there is an error while creating the Parquet split
 */
static ParquetInputSplit from(org.apache.hadoop.mapred.FileSplit split) throws IOException {
 return new ParquetInputSplit(split.getPath(),
   split.getStart(), split.getStart() + split.getLength(),
   split.getLength(), split.getLocations(), null);
}

  @Override
  public RecordReader<NullWritable, BSONWritable> getRecordReader(final InputSplit split, final JobConf job, final Reporter reporter)
    throws IOException {

    FileSplit fileSplit = (FileSplit) split;
    FileSystem fs = FileSystem.get(fileSplit.getPath().toUri(), job);
    if (split instanceof BSONFileSplit
     || !isSplitable(fs, fileSplit.getPath())) {
      BSONFileRecordReader reader = new BSONFileRecordReader();
      reader.initialize(split, job);
      return reader;
    }

    // Split was not created by BSONSplitter.
    BSONSplitter splitter = new BSONSplitter();
    splitter.setConf(job);
    splitter.setInputPath(fileSplit.getPath());
    org.apache.hadoop.mapreduce.lib.input.FileSplit newStyleFileSplit =
     new org.apache.hadoop.mapreduce.lib.input.FileSplit(
      fileSplit.getPath(), fileSplit.getStart(), fileSplit.getLength(),
      fileSplit.getLocations());
    long start = splitter.getStartingPositionForSplit(newStyleFileSplit);

    BSONFileRecordReader reader = new BSONFileRecordReader(start);
    reader.initialize(fileSplit, job);
    return reader;
  }
}

Popular methods of FileSplit

getPath
The file containing this split's data.
getLength
The number of bytes in the file to process.
getStart
The position of the first byte in the file to process.
<init>
readFields
write
toString
getLocationInfo

Popular in Java

Updating database using SQL prepared statement
compareTo (BigDecimal)
scheduleAtFixedRate (ScheduledExecutorService)
getContentResolver (Context)
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
CountDownLatch (java.util.concurrent)
A synchronization aid that allows one or more threads to wait until a set of operations being perfor
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
Reflections (org.reflections)
Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
Best plugins for Eclipse

How to use getLocationsmethodin org.apache.hadoop.mapred.FileSplit

Best Java code snippets using org.apache.hadoop.mapred.FileSplit.getLocations (Showing top 20 results out of 315)

How to use
getLocations
method
in
org.apache.hadoop.mapred.FileSplit