org.apache.flink.core.fs.FileInputSplit.getStart java code examples

@Override
public void open(FileInputSplit split) throws IOException {
  super.open(split);
  dataFileReader = initReader(split);
  dataFileReader.sync(split.getStart());
  lastSync = dataFileReader.previousSync();
}

/**
 * Creates the input splits to be forwarded to the downstream tasks of the
 * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before
 * being forwarded and only splits belonging to files in the {@code eligibleFiles}
 * list will be processed.
 * @param eligibleFiles The files to process.
 */
private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime(
      Map<Path, FileStatus> eligibleFiles) throws IOException {
  Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>();
  if (eligibleFiles.isEmpty()) {
    return splitsByModTime;
  }
  for (FileInputSplit split: format.createInputSplits(readerParallelism)) {
    FileStatus fileStatus = eligibleFiles.get(split.getPath());
    if (fileStatus != null) {
      Long modTime = fileStatus.getModificationTime();
      List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime);
      if (splitsToForward == null) {
        splitsToForward = new ArrayList<>();
        splitsByModTime.put(modTime, splitsToForward);
      }
      splitsToForward.add(new TimestampedFileInputSplit(
        modTime, split.getSplitNumber(), split.getPath(),
        split.getStart(), split.getLength(), split.getHostnames()));
    }
  }
  return splitsByModTime;
}

private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
  DatumReader<E> datumReader;
  if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
    datumReader = new GenericDatumReader<E>();
  } else {
    datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
      ? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
  }
  if (LOG.isInfoEnabled()) {
    LOG.info("Opening split {}", split);
  }
  SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
  DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);
  if (LOG.isDebugEnabled()) {
    LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
  }
  end = split.getStart() + split.getLength();
  recordsReadSinceLastSync = 0;
  return dataFileReader;
}

this.splitStart = fileSplit.getStart();
this.splitLength = fileSplit.getLength();

private Tuple2<Long, Long> getOffsetAndLengthForSplit(FileInputSplit split, List<StripeInformation> stripes) {
  long splitStart = split.getStart();
  long splitEnd = splitStart + split.getLength();
  long readStart = Long.MAX_VALUE;
  long readEnd = Long.MIN_VALUE;
  for (StripeInformation s : stripes) {
    if (splitStart <= s.getOffset() && s.getOffset() < splitEnd) {
      // stripe starts in split, so it is included
      readStart = Math.min(readStart, s.getOffset());
      readEnd = Math.max(readEnd, s.getOffset() + s.getLength());
    }
  }
  if (readStart < Long.MAX_VALUE) {
    // at least one split is included
    return Tuple2.of(readStart, readEnd - readStart);
  } else {
    return Tuple2.of(0L, 0L);
  }
}

Preconditions.checkNotNull(split, "reopen() cannot be called on a null split.");
Preconditions.checkNotNull(state, "reopen() cannot be called with a null initial state.");
Preconditions.checkArgument(state == -1 || state >= split.getStart(),
  " Illegal offset "+ state +", smaller than the splits start=" + split.getStart());
} else if (state > split.getStart()) {
  initBuffers();

/**
 * Checks if the expected input splits were created.
 */
@Test
public void checkInputSplits() throws IOException {
  FileInputSplit[] inputSplits = this.createInputFormat().createInputSplits(0);
  Arrays.sort(inputSplits, new InputSplitSorter());
  int splitIndex = 0;
  for (int fileIndex = 0; fileIndex < this.parallelism; fileIndex++) {
    List<FileInputSplit> sameFileSplits = new ArrayList<FileInputSplit>();
    Path lastPath = inputSplits[splitIndex].getPath();
    for (; splitIndex < inputSplits.length; splitIndex++) {
      if (!inputSplits[splitIndex].getPath().equals(lastPath)) {
        break;
      }
      sameFileSplits.add(inputSplits[splitIndex]);
    }
    Assert.assertEquals(this.getExpectedBlockCount(fileIndex), sameFileSplits.size());
    long lastBlockLength =
      this.rawDataSizes[fileIndex] % (this.blockSize - getInfoSize()) + getInfoSize();
    for (int index = 0; index < sameFileSplits.size(); index++) {
      Assert.assertEquals(this.blockSize * index, sameFileSplits.get(index).getStart());
      if (index < sameFileSplits.size() - 1) {
        Assert.assertEquals(this.blockSize, sameFileSplits.get(index).getLength());
      }
    }
    Assert.assertEquals(lastBlockLength, sameFileSplits.get(sameFileSplits.size() - 1).getLength());
  }
}

private TimestampedFileInputSplit getTimestampedSplit(long modTime, FileInputSplit split) {
  Preconditions.checkNotNull(split);
  return new TimestampedFileInputSplit(
    modTime,
    split.getSplitNumber(),
    split.getPath(),
    split.getStart(),
    split.getLength(),
    split.getHostnames());
}

for(FileInputSplit split : splits) {
  Assert.assertEquals(0L, split.getStart()); // always read from the beginning.
  if(split.getPath().getName().endsWith(".deflate")) {
    Assert.assertEquals(0L, split.getStart()); // always read from the beginning.
  } else {
    Assert.assertEquals(0L, split.getStart());
    Assert.assertTrue("split size not correct", split.getLength() > 0);

assertEquals(inputSplit.getStart() + inputSplit.getLength(), offsetAtEndOfSplit[splitCounter]);
splitCounter++;

Assert.assertEquals(0, fis.getStart());
if (fis.getPath().toString().equals(tempFile1)) {
  numSplitsFile1++;

@Override
public void open(FileInputSplit split) throws IOException {
  super.open(split);
  dataFileReader = initReader(split);
  dataFileReader.sync(split.getStart());
  lastSync = dataFileReader.previousSync();
}

@Override
public void open(FileInputSplit split) throws IOException {
  super.open(split);
  dataFileReader = initReader(split);
  dataFileReader.sync(split.getStart());
  lastSync = dataFileReader.previousSync();
}

@Override
public void open(FileInputSplit split) throws IOException {
  super.open(split);
  dataFileReader = initReader(split);
  dataFileReader.sync(split.getStart());
  lastSync = dataFileReader.previousSync();
}

@Override
public void open(FileInputSplit split) throws IOException {
  super.open(split);
  dataFileReader = initReader(split);
  dataFileReader.sync(split.getStart());
  lastSync = dataFileReader.previousSync();
}

private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
  DatumReader<E> datumReader;
  if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
    datumReader = new GenericDatumReader<E>();
  } else {
    datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
      ? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
  }
  if (LOG.isInfoEnabled()) {
    LOG.info("Opening split {}", split);
  }
  SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
  DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);
  if (LOG.isDebugEnabled()) {
    LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
  }
  end = split.getStart() + split.getLength();
  recordsReadSinceLastSync = 0;
  return dataFileReader;
}

@Override
public BitcoinBlock nextRecord(BitcoinBlock reuse) throws IOException {
  BitcoinBlock dataBlock=null;
  if ((this.currentSplit.getLength()<0) ||(this.stream.getPos()<=this.currentSplit.getStart()+this.currentSplit.getLength())) {
    try {
      dataBlock=this.getBbr().readBlock();
    } catch(BitcoinBlockReadException e) {
      LOG.error(e);
    }
    if (dataBlock==null) {
      this.isEndReached=true;
    }
  } else {
    this.isEndReached=true;
  }
  return dataBlock;
}

  @Override
  public EthereumBlock nextRecord(EthereumBlock reuse) throws IOException {
    EthereumBlock dataBlock=null;
    if ((this.currentSplit.getLength()<0) ||(this.stream.getPos()<=this.currentSplit.getStart()+this.currentSplit.getLength())) {
    
        try {
          dataBlock=this.getEbr().readBlock();
        } catch (EthereumBlockReadException e) {
          LOG.error(e);
          throw new RuntimeException(e.toString());
        }
      if (dataBlock==null) {
        this.isEndReached=true;
      }
    } else {
      this.isEndReached=true;
    }
    return dataBlock;
}

  protected RecordReader createReader(
      FileInputSplit fileSplit, TaskAttemptContext taskAttemptContext) throws IOException {
    // by default, we use org.apache.orc.mapreduce.OrcMapreduceRecordReader
    Configuration hadoopConf = taskAttemptContext.getConfiguration();
    org.apache.hadoop.fs.Path filePath = new org.apache.hadoop.fs.Path(fileSplit.getPath().toUri());
    Reader file = OrcFile.createReader(filePath,
      OrcFile.readerOptions(hadoopConf).maxLength(OrcConf.MAX_FILE_LENGTH.getLong(hadoopConf)));
    return new OrcMapreduceRecordReader<>(file,
      org.apache.orc.mapred.OrcInputFormat.buildOptions(hadoopConf,
        file, fileSplit.getStart(), fileSplit.getLength()));
  }
}

  private TimestampedFileInputSplit createTimestampedFileSplit(FileInputSplit split, long modificationTime, Serializable state) {
    TimestampedFileInputSplit timestampedSplit = new TimestampedFileInputSplit(
      modificationTime, split.getSplitNumber(), split.getPath(),
      split.getStart(), split.getLength(), split.getHostnames());

    if (state != null) {
      timestampedSplit.setSplitState(state);
    }
    return timestampedSplit;
  }
}

Javadoc

Returns the position of the first byte in the file to process.

Popular methods of FileInputSplit

getLength
Returns the number of bytes in the file to process.
getPath
Returns the path of the file containing this split's data.
<init>
Constructs a split with host information.
getHostnames
getSplitNumber
equals
hashCode

Popular in Java

Making http requests using okhttp
getApplicationContext (Context)
runOnUiThread (Activity)
requestLocationUpdates (LocationManager)
Socket (java.net)
Provides a client-side TCP socket.
Path (java.nio.file)
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
BasicDataSource (org.apache.commons.dbcp)
Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
Option (scala)
Top Sublime Text plugins

How to use getStartmethodin org.apache.flink.core.fs.FileInputSplit

Best Java code snippets using org.apache.flink.core.fs.FileInputSplit.getStart (Showing top 20 results out of 315)

How to use
getStart
method
in
org.apache.flink.core.fs.FileInputSplit