org.apache.hadoop.mapreduce.lib.input.FileSplit.<init> java code examples

@Override public List<InputSplit> getSplits(JobContext ctx) throws IOException, InterruptedException {
  List<InputSplit> res = new ArrayList<>(BLOCK_CNT);
  for (int i = 0; i < BLOCK_CNT; i++)
    try {
      res.add(new FileSplit(new Path(new URI("someFile")), i, i + 1, new String[] {"localhost"}));
    }
    catch (URISyntaxException e) {
      throw new IOException(e);
    }
  return res;
}

/**
 * Returns a split for each store files directory using the block location
 * of each file as locality reference.
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
 List<InputSplit> splits = new ArrayList<>();
 List<FileStatus> files = listStatus(job);
 Text key = new Text();
 for (FileStatus file: files) {
  Path path = file.getPath();
  FileSystem fs = path.getFileSystem(job.getConfiguration());
  LineReader reader = new LineReader(fs.open(path));
  long pos = 0;
  int n;
  try {
   while ((n = reader.readLine(key)) > 0) {
    String[] hosts = getStoreDirHosts(fs, path);
    splits.add(new FileSplit(path, pos, n, hosts));
    pos += n;
   }
  } finally {
   reader.close();
  }
 }
 return splits;
}

  /**
   * @param clsName Input split class name.
   * @param in Input stream.
   * @param hosts Optional hosts.
   * @return File block or {@code null} if it is not a {@link FileSplit} instance.
   * @throws IgniteCheckedException If failed.
   */
  public static HadoopFileBlock readFileBlock(String clsName, DataInput in, @Nullable String[] hosts)
    throws IgniteCheckedException {
    if (!FileSplit.class.getName().equals(clsName))
      return null;

    FileSplit split = new FileSplit();

    try {
      split.readFields(in);
    }
    catch (IOException e) {
      throw new IgniteCheckedException(e);
    }

    if (hosts == null)
      hosts = EMPTY_HOSTS;

    return new HadoopFileBlock(hosts, split.getPath().toUri(), split.getStart(), split.getLength());
  }
}

/** {@inheritDoc} */
@Override public InputSplit getInputSplit() {
  if (inputSplit == null) {
    HadoopInputSplit split = ctx.taskInfo().inputSplit();
    if (split == null)
      return null;
    if (split instanceof HadoopFileBlock) {
      HadoopFileBlock fileBlock = (HadoopFileBlock)split;
      inputSplit = new FileSplit(new Path(fileBlock.file()), fileBlock.start(), fileBlock.length(), null);
    }
    else
    {
      try {
        inputSplit = (InputSplit) ((HadoopV2TaskContext)ctx).getNativeSplit(split);
      } catch (IgniteCheckedException e) {
        throw new IllegalStateException(e);
      }
    }
  }
  return inputSplit;
}

@Override
public void initialize(InputSplit unusedSplit, TaskAttemptContext cx) throws IOException, InterruptedException {
 super.initialize(
   new FileSplit(this.split.getPath(this.idx), this.split.getOffset(this.idx), this.split.getLength(this.idx),
     null), cx);
}

/**
 * Tests serialization of wrapper and the wrapped native split.
 * @throws Exception If fails.
 */
@Test
public void testSerialization() throws Exception {
  FileSplit nativeSplit = new FileSplit(new Path("/path/to/file"), 100, 500, new String[]{"host1", "host2"});
  assertEquals("/path/to/file:100+500", nativeSplit.toString());
  HadoopSplitWrapper split = HadoopUtils.wrapSplit(10, nativeSplit, nativeSplit.getLocations());
  assertEquals("[host1, host2]", Arrays.toString(split.hosts()));
  ByteArrayOutputStream buf = new ByteArrayOutputStream();
  ObjectOutput out = new ObjectOutputStream(buf);
  out.writeObject(split);
  ObjectInput in = new ObjectInputStream(new ByteArrayInputStream(buf.toByteArray()));
  final HadoopSplitWrapper res = (HadoopSplitWrapper)in.readObject();
  assertEquals("/path/to/file:100+500", HadoopUtils.unwrapSplit(res).toString());
  GridTestUtils.assertThrows(log, new Callable<Object>() {
    @Override public Object call() throws Exception {
      res.hosts();
      return null;
    }
  }, AssertionError.class, null);
}

 private static SortedSet<byte[]> readFileToSearch(final Configuration conf,
   final FileSystem fs, final LocatedFileStatus keyFileStatus) throws IOException,
   InterruptedException {
  SortedSet<byte []> result = new TreeSet<>(Bytes.BYTES_COMPARATOR);
  // Return entries that are flagged Counts.UNDEFINED in the value. Return the row. This is
  // what is missing.
  TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
  try (SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader rr =
    new SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader()) {
   InputSplit is =
    new FileSplit(keyFileStatus.getPath(), 0, keyFileStatus.getLen(), new String [] {});
   rr.initialize(is, context);
   while (rr.nextKeyValue()) {
    rr.getCurrentKey();
    BytesWritable bw = rr.getCurrentValue();
    if (Verify.VerifyReducer.whichType(bw.getBytes()) == Verify.Counts.UNDEFINED) {
     byte[] key = new byte[rr.getCurrentKey().getLength()];
     System.arraycopy(rr.getCurrentKey().getBytes(), 0, key, 0, rr.getCurrentKey()
       .getLength());
     result.add(key);
    }
   }
  }
  return result;
 }
}

   splits.add(new FileSplit(fileName, begin, recordLength, new String[]{}));
   begin = length;
   recordLength = 0;
 recordLength += num;
splits.add(new FileSplit(fileName, begin, recordLength, new String[]{}));

public ArrayList<String> readRecords(URL testFileUrl, int splitSize)
  throws IOException {
 // Set up context
 File testFile = new File(testFileUrl.getFile());
 long testFileSize = testFile.length();
 Path testFilePath = new Path(testFile.getAbsolutePath());
 Configuration conf = new Configuration();
 conf.setInt("io.file.buffer.size", 1);
 TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
 // Gather the records returned by the record reader
 ArrayList<String> records = new ArrayList<String>();
 long offset = 0;
 while (offset < testFileSize) {
  FileSplit split = new FileSplit(testFilePath, offset, splitSize, null);
  LineRecordReader reader = new LineRecordReader();
  reader.initialize(split, context);
  while (reader.nextKeyValue()) {
   records.add(reader.getCurrentValue().toString());
  }
  offset += splitSize;
 }
 return records;
}

@Test
public void testMaxBlockLocationsNewSplits() throws Exception {
 TEST_DIR.mkdirs();
 try {
  Configuration conf = new Configuration();
  conf.setInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY, 4);
  Path submitDir = new Path(TEST_DIR.getAbsolutePath());
  FileSystem fs = FileSystem.getLocal(conf);
  FileSplit split = new FileSplit(new Path("/some/path"), 0, 1,
    new String[] { "loc1", "loc2", "loc3", "loc4", "loc5" });
  JobSplitWriter.createSplitFiles(submitDir, conf, fs,
    new FileSplit[] { split });
  JobSplit.TaskSplitMetaInfo[] infos =
    SplitMetaInfoReader.readSplitMetaInfo(new JobID(), fs, conf,
      submitDir);
  assertEquals("unexpected number of splits", 1, infos.length);
  assertEquals("unexpected number of split locations",
    4, infos[0].getLocations().length);
 } finally {
  FileUtil.fullyDelete(TEST_DIR);
 }
}

FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
  (String[])null);
LineRecordReader reader = new LineRecordReader();

/**
 * A factory that makes the split for this class. It can be overridden
 * by sub-classes to make sub-types
 */
protected FileSplit makeSplit(Path file, long start, long length, 
               String[] hosts, String[] inMemoryHosts) {
 return new FileSplit(file, start, length, hosts, inMemoryHosts);
}

/**
 * A factory that makes the split for this class. It can be overridden
 * by sub-classes to make sub-types
 */
protected FileSplit makeSplit(Path file, long start, long length, 
               String[] hosts, String[] inMemoryHosts) {
 return new FileSplit(file, start, length, hosts, inMemoryHosts);
}

/**
 * A factory that makes the split for this class. It can be overridden
 * by sub-classes to make sub-types
 */
protected FileSplit makeSplit(Path file, long start, long length, 
               String[] hosts) {
 return new FileSplit(file, start, length, hosts);
}

@Test
public void testMultipleClose() throws IOException {
 URL testFileUrl = getClass().getClassLoader().
   getResource("recordSpanningMultipleSplits.txt.bz2");
 assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
   testFileUrl);
 File testFile = new File(testFileUrl.getFile());
 Path testFilePath = new Path(testFile.getAbsolutePath());
 long testFileSize = testFile.length();
 Configuration conf = new Configuration();
 conf.setInt(org.apache.hadoop.mapreduce.lib.input.
   LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
 TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
 // read the data and check whether BOM is skipped
 FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
 LineRecordReader reader = new LineRecordReader();
 reader.initialize(split, context);
 //noinspection StatementWithEmptyBody
 while (reader.nextKeyValue()) ;
 reader.close();
 reader.close();
 BZip2Codec codec = new BZip2Codec();
 codec.setConf(conf);
 Set<Decompressor> decompressors = new HashSet<Decompressor>();
 for (int i = 0; i < 10; ++i) {
  decompressors.add(CodecPool.getDecompressor(codec));
 }
 assertEquals(10, decompressors.size());
}

FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
  (String[])null);
LineRecordReader reader = new LineRecordReader(recordDelimiterBytes);
split = new FileSplit(testFilePath, 0, firstSplitLength, (String[])null);
reader = new LineRecordReader(recordDelimiterBytes);
reader.initialize(split, context);
split = new FileSplit(testFilePath, firstSplitLength,
  testFileSize - firstSplitLength, (String[])null);
reader = new LineRecordReader(recordDelimiterBytes);

conf.setInt(org.apache.hadoop.mapreduce.lib.input.
  LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
FileSplit split = new FileSplit(inputFile, 0, 15, (String[])null);
TaskAttemptContext context = new TaskAttemptContextImpl(conf,
  new TaskAttemptID());
split = new FileSplit(inputFile, 15, 4, (String[])null);
reader = new LineRecordReader(null);
reader.initialize(split, context);
split = new FileSplit(inputFile, 0, 12, (String[])null);
reader = new LineRecordReader(null);
reader.initialize(split, context);

byte[] recordDelimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8);
int splitLength = 15;
FileSplit split = new FileSplit(inputFile, 0, splitLength, (String[])null);
TaskAttemptContext context = new TaskAttemptContextImpl(conf,
  new TaskAttemptID());
assertNull("Unexpected key returned", key);
reader.close();
split = new FileSplit(inputFile, splitLength,
  inputData.length() - splitLength, (String[])null);
reader = new LineRecordReader(recordDelimiterBytes);
inputFile = createInputFile(conf, inputData);
splitLength = 5;
split = new FileSplit(inputFile, 0, splitLength, (String[])null);
reader = new LineRecordReader(recordDelimiterBytes);
reader.initialize(split, context);
reader.close();
split = new FileSplit(inputFile, splitLength,
  inputData.length () - splitLength, (String[])null);
reader = new LineRecordReader(recordDelimiterBytes);
  split = new FileSplit(inputFile, 0, bufferSize, (String[]) null);
  reader = new LineRecordReader(recordDelimiterBytes);
  reader.initialize(split, context);

private void openForRead(TaskAttemptContext taskAttemptContext)
    throws IOException, InterruptedException {
 reader = new SequenceFileRecordReader<K, V>();
 reader.initialize(new FileSplit(chunkFilePath, 0,
     DistCpUtils.getFileSize(chunkFilePath,
       chunkContext.getConfiguration()), null), taskAttemptContext);
}

private CarbonInputSplit convertToCarbonInputSplit(ExtendedBlocklet blocklet) throws IOException {
 CarbonInputSplit split = CarbonInputSplit
   .from(blocklet.getSegmentId(), blocklet.getBlockletId(),
     new FileSplit(new Path(blocklet.getPath()), 0, blocklet.getLength(),
       blocklet.getLocations()),
     ColumnarFormatVersion.valueOf((short) blocklet.getDetailInfo().getVersionNumber()),
     blocklet.getDataMapWriterPath());
 split.setDetailInfo(blocklet.getDetailInfo());
 return split;
}

Javadoc

Constructs a split with host information

Popular methods of FileSplit

getPath
The file containing this split's data.
getLength
The number of bytes in the file to process.
getStart
The position of the first byte in the file to process.
getLocations
readFields
write
toString
getLocationInfo

Popular in Java

Making http post requests using okhttp
compareTo (BigDecimal)
scheduleAtFixedRate (Timer)
getApplicationContext (Context)
Proxy (java.net)
This class represents proxy server settings. A created instance of Proxy stores a type and an addres
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
NumberFormat (java.text)
The abstract base class for all number formats. This class provides the interface for formatting and
Executors (java.util.concurrent)
Factory and utility methods for Executor, ExecutorService, ScheduledExecutorService, ThreadFactory,
HttpServletRequest (javax.servlet.http)
Extends the javax.servlet.ServletRequest interface to provide request information for HTTP servlets.
CodeWhisperer alternatives

How to use org.apache.hadoop.mapreduce.lib.input.FileSplitconstructor

Best Java code snippets using org.apache.hadoop.mapreduce.lib.input.FileSplit.<init> (Showing top 20 results out of 666)

How to use
org.apache.hadoop.mapreduce.lib.input.FileSplit
constructor