org.seqdoop.hadoop_bam.BAMInputFormat java code examples

final List<InputSplit> origSplits = removeIndexFiles(splits);
    i = addIndexedSplits                        (origSplits, i, newSplits, cfg);
  } catch (IOException | ProviderNotFoundException e) {
    if (cfg.getBoolean(ENABLE_BAI_SPLIT_CALCULATOR, false)) {
      try {
        i = addBAISplits            (origSplits, i, newSplits, cfg);
      } catch (IOException | ProviderNotFoundException e2) {
        i = addProbabilisticSplits  (origSplits, i, newSplits, cfg);
      i = addProbabilisticSplits          (origSplits, i, newSplits, cfg);
return filterByInterval(newSplits, cfg);

/**
 * Converts a List of SimpleIntervals into the format required by the SamReader query API
 * @param rawIntervals SimpleIntervals to be converted
 * @return A sorted, merged list of QueryIntervals suitable for passing to the SamReader query API
 */
static QueryInterval[] prepareQueryIntervals( final List<Interval>
    rawIntervals, final SAMSequenceDictionary sequenceDictionary ) {
  if ( rawIntervals == null || rawIntervals.isEmpty() ) {
    return null;
  }
  // Convert each SimpleInterval to a QueryInterval
  final QueryInterval[] convertedIntervals =
      rawIntervals.stream()
          .map(rawInterval -> convertSimpleIntervalToQueryInterval(rawInterval, sequenceDictionary))
          .toArray(QueryInterval[]::new);
  // Intervals must be optimized (sorted and merged) in order to use the htsjdk query API
  return QueryInterval.optimizeIntervals(convertedIntervals);
}
/**

public BAMRecordWriter(
    Path output, SAMFileHeader header, boolean writeHeader,
    TaskAttemptContext ctx)
  throws IOException
{
  init(
    output.getFileSystem(ctx.getConfiguration()).create(output),
    header, writeHeader);
  if (ctx.getConfiguration().getBoolean(BAMOutputFormat.WRITE_SPLITTING_BAI, false)) {
    Path splittingIndex = BAMInputFormat.getIdxPath(output);
    OutputStream splittingIndexOutput =
        output.getFileSystem(ctx.getConfiguration()).create(splittingIndex);
    splittingBAMIndexer = new SplittingBAMIndexer(splittingIndexOutput);
  }
}
public BAMRecordWriter(

public List<InputSplit> getSplits(
    List<InputSplit> splits, Configuration cfg)
  throws IOException
{
  // Align the splits so that they don't cross blocks.
  // addIndexedSplits() requires the given splits to be sorted by file
  // path, so do so. Although FileInputFormat.getSplits() does, at the time
  // of writing this, generate them in that order, we shouldn't rely on it.
  splits.sort((a, b) -> {
    FileSplit fa = (FileSplit) a, fb = (FileSplit) b;
    return fa.getPath().compareTo(fb.getPath());
  });
  final List<InputSplit> newSplits =
    new ArrayList<>(splits.size());
  for (int i = 0; i < splits.size();) {
    try {
      i = addIndexedSplits      (splits, i, newSplits, cfg);
    } catch (IOException e) {
      i = addProbabilisticSplits(splits, i, newSplits, cfg);
    }
  }
  return filterByInterval(newSplits, cfg);
}

    BAMInputFormat.removeIndexFiles(super.getSplits(job));
    newSplits.add(split);
newSplits.addAll(bamIF.getSplits(bamOrigSplits, job.getConfiguration()));
newSplits.addAll(cramIF.getSplits(cramOrigSplits, job.getConfiguration()));
return newSplits;

@Test
public void testNoReadsInFirstSplitBug() throws Exception {
 input = BAMTestUtil.writeBamFileWithLargeHeader().getAbsolutePath();
 completeSetup();
 BAMInputFormat inputFormat = new BAMInputFormat();
 List<InputSplit> splits = inputFormat.getSplits(jobContext);
 assertEquals(1, splits.size());
}

@Test
public void testMultipleSplitsBaiEnabled() throws Exception {
 input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate)
   .getAbsolutePath();
 completeSetup();
 BAMInputFormat.setEnableBAISplitCalculator(jobContext.getConfiguration(), true);
 jobContext.getConfiguration().setInt(FileInputFormat.SPLIT_MAXSIZE, 40000);
 BAMInputFormat inputFormat = new BAMInputFormat();
 List<InputSplit> splits = inputFormat.getSplits(jobContext);
 assertEquals(3, splits.size());
 List<SAMRecord> split0Records = getSAMRecordsFromSplit(inputFormat, splits.get(0));
 List<SAMRecord> split1Records = getSAMRecordsFromSplit(inputFormat, splits.get(1));
 List<SAMRecord> split2Records = getSAMRecordsFromSplit(inputFormat, splits.get(2));
 assertEquals(1080, split0Records.size());
 assertEquals(524, split1Records.size());
 assertEquals(398, split2Records.size());
}

boolean useIntelInflater = BAMInputFormat.useIntelInflater(conf);
boolean boundedTraversal = BAMInputFormat.isBoundedTraversal(conf);
if (boundedTraversal && split.getIntervalFilePointers() != null) {
  List<Interval> intervals = BAMInputFormat.getIntervals(conf);
  QueryInterval[] queryIntervals = BAMInputFormat.prepareQueryIntervals(intervals, header.getSequenceDictionary());
  iterator = bamFileReader.createIndexIterator(queryIntervals, false, split.getIntervalFilePointers());
} else if (boundedTraversal && split.getIntervalFilePointers() == null) {

private List<InputSplit> filterByInterval(List<InputSplit> splits, Configuration conf)
    throws IOException {
  if (!isBoundedTraversal(conf)) {
    return splits;
      .setUseAsyncIo(false);
  List<Interval> intervals = getIntervals(conf);
  boolean traverseUnplacedUnmapped = traverseUnplacedUnmapped(conf);
          QueryInterval[] queryIntervals = prepareQueryIntervals(intervals, dict);
          fileToSpan.put(bamFile, BAMFileReader.getFileSpan(queryIntervals, idx));

path.getFileSystem(cfg).open(getIdxPath(path)));
  return addProbabilisticSplits(splits, i, newSplits, cfg);

case BAM: return bamIF.createRecordReader(split, ctx);
case CRAM: return cramIF.createRecordReader(split, ctx);
default: assert false; return null;

List<Interval> intervals = BAMInputFormat.getIntervals(conf);
if (intervals != null) {
  QueryInterval[] queryIntervals = BAMInputFormat.prepareQueryIntervals(intervals, header.getSequenceDictionary());
  iterator = bamFileReader.createIndexIterator(queryIntervals, false, split.getIntervalFilePointers());
} else {

    newSplits.add(split);
newSplits.addAll(bamIF.getSplits(bamOrigSplits, job.getConfiguration()));
newSplits.addAll(cramIF.getSplits(cramOrigSplits, job.getConfiguration()));
return newSplits;

/** Defers to {@link BAMInputFormat}, {@link CRAMInputFormat}, or
 * {@link SAMInputFormat} as appropriate for the given path.
 */
@Override public boolean isSplitable(JobContext job, Path path) {
  if (this.conf == null)
    this.conf = job.getConfiguration();
  try {
    final SAMFormat fmt = getFormat(path);
    if (fmt == null)
  return super.isSplitable(job, path);
    switch (fmt) {
  case SAM: return samIF.isSplitable(job, path);
  case BAM: return bamIF.isSplitable(job, path);
  case CRAM: return cramIF.isSplitable(job, path);
  default: assert false; return false;
 }
  } catch (PathNotFoundException e) {
    return super.isSplitable(job, path);
  }
}

@Test
public void testUnmapped() throws Exception {
 input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate)
   .getAbsolutePath();
 completeSetupWithBoundedTraversal(null, true);
 jobContext.getConfiguration().setInt(FileInputFormat.SPLIT_MAXSIZE, 40000);
 BAMInputFormat inputFormat = new BAMInputFormat();
 List<InputSplit> splits = inputFormat.getSplits(jobContext);
 assertEquals(1, splits.size());
 List<SAMRecord> split0Records = getSAMRecordsFromSplit(inputFormat, splits.get(0));
 assertEquals(2, split0Records.size());
}

@Test
public void testMultipleSplitsBaiEnabledSuffixPath() throws Exception {
 input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate)
   .getAbsolutePath();
 File index = new File(input.replaceFirst("\\.bam$", BAMIndex.BAMIndexSuffix));
 index.renameTo(new File(input + BAMIndex.BAMIndexSuffix));
 completeSetup();
 BAMInputFormat.setEnableBAISplitCalculator(jobContext.getConfiguration(), true);
 jobContext.getConfiguration().setInt(FileInputFormat.SPLIT_MAXSIZE, 40000);
 BAMInputFormat inputFormat = new BAMInputFormat();
 List<InputSplit> splits = inputFormat.getSplits(jobContext);
 assertEquals(3, splits.size());
 List<SAMRecord> split0Records = getSAMRecordsFromSplit(inputFormat, splits.get(0));
 List<SAMRecord> split1Records = getSAMRecordsFromSplit(inputFormat, splits.get(1));
 List<SAMRecord> split2Records = getSAMRecordsFromSplit(inputFormat, splits.get(2));
 assertEquals(1080, split0Records.size());
 assertEquals(524, split1Records.size());
 assertEquals(398, split2Records.size());
}

boolean useIntelInflater = BAMInputFormat.useIntelInflater(conf);
boolean boundedTraversal = BAMInputFormat.isBoundedTraversal(conf);
if (boundedTraversal && split.getIntervalFilePointers() != null) {
  List<Interval> intervals = BAMInputFormat.getIntervals(conf);
  QueryInterval[] queryIntervals = BAMInputFormat.prepareQueryIntervals(intervals, header.getSequenceDictionary());
  iterator = bamFileReader.createIndexIterator(queryIntervals, false, split.getIntervalFilePointers());
} else if (boundedTraversal && split.getIntervalFilePointers() == null) {

private List<InputSplit> filterByInterval(List<InputSplit> splits, Configuration conf)
    throws IOException {
  if (!isBoundedTraversal(conf)) {
    return splits;
      .setUseAsyncIo(false);
  List<Interval> intervals = getIntervals(conf);
  boolean traverseUnplacedUnmapped = traverseUnplacedUnmapped(conf);
          QueryInterval[] queryIntervals = prepareQueryIntervals(intervals, dict);
          fileToSpan.put(bamFile, BAMFileReader.getFileSpan(queryIntervals, idx));

file.getFileSystem(cfg).open(getIdxPath(file)));
  return addProbabilisticSplits(splits, i, newSplits, cfg);

case BAM: return bamIF.createRecordReader(split, ctx);
case CRAM: return cramIF.createRecordReader(split, ctx);
default: assert false; return null;

Javadoc

An org.apache.hadoop.mapreduce.InputFormat for BAM files. Values are the individual records; see BAMRecordReader for the meaning of the key.

Most used methods

createRecordReader
Returns a BAMRecordReader initialized with the parameters.
getSplits
The splits returned are FileVirtualSplit.
addIndexedSplits
addProbabilisticSplits
convertSimpleIntervalToQueryInterval
Converts an interval in SimpleInterval format into an htsjdk QueryInterval. In doing so, a header lo
filterByInterval
getIdxPath
getIntervals
isSplitable
prepareQueryIntervals
Converts a List of SimpleIntervals into the format required by the SamReader query API
setTraversalParameters
Only include reads that overlap the given intervals (if specified) and unplaced unmapped reads (if t
<init>

Popular in Java

Making http requests using okhttp
addToBackStack (FragmentTransaction)
requestLocationUpdates (LocationManager)
findViewById (Activity)
MalformedURLException (java.net)
This exception is thrown when a program attempts to create an URL from an incorrect specification.
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
Handler (java.util.logging)
A Handler object accepts a logging request and exports the desired messages to a target, for example
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
JTable (javax.swing)
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
Top Sublime Text plugins

How to useBAMInputFormat in org.seqdoop.hadoop_bam

Best Java code snippets using org.seqdoop.hadoop_bam.BAMInputFormat (Showing top 20 results out of 315)

How to use
BAMInputFormat
in
org.seqdoop.hadoop_bam