htsjdk.samtools.reference.ReferenceSequenceFile java code examples

/**
 * Updates the cache to include the new contig
 * @param contig
 */
private synchronized ReferenceSequence addToCache(String contig) {
  ReferenceSequence seq = cache.get(contig);
  if (seq != null) {
    // already populated by another thread while we were waiting to enter
    // this synchronized block
    return seq;
  }
  log.debug("Caching reference genome contig ", contig);
  seq = underlying.getSequence(contig);
  cache = ImmutableMap.<String, ReferenceSequence>builder()
      .putAll(cache)
      .put(contig, seq)
      .build();
  referenceIndexLookup[underlying.getSequenceDictionary().getSequence(contig).getSequenceIndex()] = seq;
  return seq;
}
@Override

public ReferenceSequence queryReferenceSequence(String chromosome, int start, int end) {
  ReferenceSequence referenceSequence = indexedFastaSequenceFile.getSubsequenceAt(chromosome, start, end);
  return referenceSequence;
}

@Override
public ReferenceSequence nextSequence() {
  return underlying.nextSequence();
}
@Override

public static byte[] getBasesFromReferenceFile(String referenceFilePath, String seqName, int from, int length) {
  ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(
      referenceFilePath));
  ReferenceSequence sequence = referenceSequenceFile.getSequence(seqName);
  byte[] bases = referenceSequenceFile.getSubsequenceAt(sequence.getName(), from, from + length).getBases();
  return bases;
}

byte[] findBasesByName(final String name, final boolean tryVariants) {
  if (rsFile == null || !rsFile.isIndexed())
    return null;
  ReferenceSequence sequence = null;
  try {
    sequence = rsFile.getSequence(name);
  } catch (final SAMException e) {
    // the only way to test if rsFile contains the sequence is to try and catch exception.
  }
  if (sequence != null)
    return sequence.getBases();
  if (tryVariants) {
    for (final String variant : getVariants(name)) {
      try {
        sequence = rsFile.getSequence(variant);
      } catch (final SAMException e) {
        log.warn("Sequence not found: " + variant);
      }
      if (sequence != null)
        return sequence.getBases();
    }
  }
  return null;
}

/**
 * Get reference sequence without validating name or length.  This is OK if the entire sequence
 * dictionary was validated before reading sequences.
 */
public ReferenceSequence get(final int sequenceIndex) {
  if (referenceSequence != null && referenceSequence.getContigIndex() == sequenceIndex) {
    return referenceSequence;
  }
  if (referenceSequence != null && referenceSequence.getContigIndex() > sequenceIndex) {
    throw new SAMException("Requesting earlier reference sequence: " + sequenceIndex + " < " +
    referenceSequence.getContigIndex());
  }
  referenceSequence = null;
  if(referenceSequenceFile.isIndexed() && referenceSequenceFile.getSequenceDictionary() != null) {
    final SAMSequenceRecord samSequenceRecord = referenceSequenceFile.getSequenceDictionary().getSequence(sequenceIndex);
    if(samSequenceRecord != null) {
      referenceSequence = referenceSequenceFile.getSequence(samSequenceRecord.getSequenceName()) ;
    } // else referenceSequence will remain null
  } else {
    do {
      referenceSequence = referenceSequenceFile.nextSequence();
    }
    while (referenceSequence != null && referenceSequence.getContigIndex() < sequenceIndex);
  }
  if (referenceSequence == null || referenceSequence.getContigIndex() != sequenceIndex) {
    throw new SAMException("Reference sequence (" + sequenceIndex +
        ") not found in " + referenceSequenceFile.toString());
  }
  return referenceSequence;
}

/**
 * set our internal reference contig order
 * @param refFile the reference file
 */
@Requires("refFile != null")
public GenomeLocParser(final ReferenceSequenceFile refFile) {
  this(refFile.getSequenceDictionary());
}

/**
 * Get the reference bases from referenceReader spanned by the extended location of this active region,
 * including additional padding bp on either side.  If this expanded region would exceed the boundaries
 * of the active region's contig, the returned result will be truncated to only include on-genome reference
 * bases
 * @param referenceReader the source of the reference genome bases
 * @param padding the padding, in BP, we want to add to either side of this active region extended region
 * @param genomeLoc a non-null genome loc indicating the base span of the bp we'd like to get the reference for
 * @return a non-null array of bytes holding the reference bases in referenceReader
 */
@Ensures("result != null")
public byte[] getReference( final ReferenceSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
  if ( referenceReader == null ) throw new IllegalArgumentException("referenceReader cannot be null");
  if ( padding < 0 ) throw new IllegalArgumentException("padding must be a positive integer but got " + padding);
  if ( genomeLoc == null ) throw new IllegalArgumentException("genomeLoc cannot be null");
  if ( genomeLoc.size() == 0 ) throw new IllegalArgumentException("GenomeLoc must have size > 0 but got " + genomeLoc);
  final byte[] reference =  referenceReader.getSubsequenceAt( genomeLoc.getContig(),
      Math.max(1, genomeLoc.getStart() - padding),
      Math.min(referenceReader.getSequenceDictionary().getSequence(genomeLoc.getContig()).getSequenceLength(), genomeLoc.getStop() + padding) ).getBases();
  return reference;
}

@Override
protected int doWork() {
  IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
  IOUtil.assertFileIsWritable(OUTPUT);
  final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE, true);
  if (!refFile.isIndexed()) {
    throw new IllegalStateException("Reference file must be indexed, but no index file was found");
  }
  if (refFile.getSequenceDictionary() == null) {
    throw new IllegalStateException("Reference file must include a dictionary, but no dictionary file was found");
  }
  // get the intervals
  final IntervalList intervals = segregateReference(refFile, MAX_TO_MERGE);
  log.info(String.format("Found %d intervals in %d loci during %s seconds", intervalProgress.getCount(), locusProgress.getCount(), locusProgress.getElapsedSeconds()));
  /**********************************
   * Now output regions for calling *
   **********************************/
  final IntervalList outputIntervals = new IntervalList(intervals.getHeader().clone());
  log.info(String.format("Collecting requested type of intervals (%s)", OUTPUT_TYPE));
  intervals.getIntervals().stream().filter(i -> OUTPUT_TYPE.accepts(i.getName())).forEach(outputIntervals::add);
  log.info("Writing Intervals.");
  outputIntervals.write(OUTPUT);
  log.info(String.format("Execution ending. Total time %d seconds", locusProgress.getElapsedSeconds()));
  return 0;
}

/**
 * Ensures that a sequence dictionary exists for the given reference
 * @param referenceFile reference genome fasta
 * @param invoking program
 */
public static void ensureSequenceDictionary(File referenceFile, CommandLineProgram program) {
  try {
    ReferenceSequenceFile rsf = new FastaSequenceFile(referenceFile, false);
    Path path = referenceFile.toPath().toAbsolutePath();
    if (rsf.getSequenceDictionary() == null) {
      log.info("Attempting to create sequence dictionary for " + referenceFile);
      Path dictPath = path.resolveSibling(path.getFileName().toString() + htsjdk.samtools.util.IOUtil.DICT_FILE_EXTENSION);
      picard.sam.CreateSequenceDictionary csd = new picard.sam.CreateSequenceDictionary();
      if (program != null) {
        CommandLineProgramHelper.copyInputs(program, csd);
      }
      csd.instanceMain(new String[] {
        "OUTPUT=" + dictPath.toFile(),
        "R=" + referenceFile.getPath()
      });
    }
    rsf.close();
  } catch (Exception e) {
    log.error("Sequence dictionary creation failed. Please create using picard CreateSequenceDictionary.", e);
  }
}
public void setReference(ReferenceLookup ref) {

@Override
public synchronized ReferenceSequence getSequence(String contig) {
  if (isClosed) throw new IllegalStateException("Underlying reference has closed");
  return underlying.getSequence(contig);
}

/**
 * Checks if the set FASTA file is indexed
 * @return
 */
public Boolean hasIndex() {
  Boolean hasIndex = false;
  if (this.indexedFastaSequenceFile != null) {
    hasIndex = this.indexedFastaSequenceFile.isIndexed();
  }
  return hasIndex;
}

  @Override
  public void close() throws IOException {
    referenceSequenceFile.close();
  }
}

/**
 * Get reference sequence without validating name or length.  This is OK if the entire sequence
 * dictionary was validated before reading sequences.
 */
public ReferenceSequence get(final int sequenceIndex) {
  if (referenceSequence != null && referenceSequence.getContigIndex() == sequenceIndex) {
    return referenceSequence;
  }
  if (referenceSequence != null && referenceSequence.getContigIndex() > sequenceIndex) {
    throw new SAMException("Requesting earlier reference sequence: " + sequenceIndex + " < " +
    referenceSequence.getContigIndex());
  }
  referenceSequence = null;
  if(referenceSequenceFile.isIndexed() && referenceSequenceFile.getSequenceDictionary() != null) {
    final SAMSequenceRecord samSequenceRecord = referenceSequenceFile.getSequenceDictionary().getSequence(sequenceIndex);
    if(samSequenceRecord != null) {
      referenceSequence = referenceSequenceFile.getSequence(samSequenceRecord.getSequenceName()) ;
    } // else referenceSequence will remain null
  } else {
    do {
      referenceSequence = referenceSequenceFile.nextSequence();
    }
    while (referenceSequence != null && referenceSequence.getContigIndex() < sequenceIndex);
  }
  if (referenceSequence == null || referenceSequence.getContigIndex() != sequenceIndex) {
    throw new SAMException("Reference sequence (" + sequenceIndex +
        ") not found in " + referenceSequenceFile.toString());
  }
  return referenceSequence;
}

  @Override
  SAMSequenceDictionary extractDictionary(Path reference) {
    final SAMSequenceDictionary dict = ReferenceSequenceFileFactory.getReferenceSequenceFile(reference).getSequenceDictionary();
    if (dict == null)
      throw new SAMException("Could not find dictionary next to reference file " + reference.toUri().toString());
    return dict;
  }
},

  protected byte[] getReferenceBases( GenomeLoc genomeLoc ) {
    SAMSequenceRecord sequenceInfo = reference.getSequenceDictionary().getSequence(genomeLoc.getContig());

    long start = genomeLoc.getStart();
    long stop = Math.min( genomeLoc.getStop(), sequenceInfo.getSequenceLength() );

    // Read with no aligned bases?  Return an empty array.
    if(stop - start + 1 == 0)
      return new byte[0];

    ReferenceSequence subsequence = reference.getSubsequenceAt(genomeLoc.getContig(), start, stop);

    int overhang = (int)(genomeLoc.getStop() - stop);
    if ( overhang > 0 ) {
      if ( overhang > BUFFER ) // todo -- this is a bit dangerous
        throw new ReviewedGATKException("Insufficient buffer size for Xs overhanging genome -- expand BUFFER");
      byte[] all = new byte[subsequence.getBases().length + overhang];
      System.arraycopy(subsequence.getBases(), 0, all, 0, subsequence.getBases().length);
      System.arraycopy(Xs, 0, all, subsequence.getBases().length, overhang);
      return all;
    } else {
      // fast path
      return subsequence.getBases();
    }
  }
}

private String fetchBaseString(final ReferenceSequenceFile reader, final String contig, final int start, final int stop) {
  if ( start == -1 )
    return new String(reader.getSequence(contig).getBases());
  else
    return new String(reader.getSubsequenceAt(contig, start, stop).getBases());
}

byte[] findBasesByName(final String name, final boolean tryVariants) {
  if (rsFile == null || !rsFile.isIndexed())
    return null;
  ReferenceSequence sequence = null;
  try {
    sequence = rsFile.getSequence(name);
  } catch (final SAMException e) {
    // the only way to test if rsFile contains the sequence is to try and catch exception.
  }
  if (sequence != null)
    return sequence.getBases();
  if (tryVariants) {
    for (final String variant : getVariants(name)) {
      try {
        sequence = rsFile.getSequence(variant);
      } catch (final SAMException e) {
        log.warn("Sequence not found: " + variant);
      }
      if (sequence != null)
        return sequence.getBases();
    }
  }
  return null;
}

@Override
protected int doWork() {
  IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
  IOUtil.assertFileIsWritable(OUTPUT);
  final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE, true);
  if (!refFile.isIndexed()) {
    throw new IllegalStateException("Reference file must be indexed, but no index file was found");
  }
  if (refFile.getSequenceDictionary() == null) {
    throw new IllegalStateException("Reference file must include a dictionary, but no dictionary file was found");
  }
  // get the intervals
  final IntervalList intervals = segregateReference(refFile, MAX_TO_MERGE);
  log.info(String.format("Found %d intervals in %d loci during %s seconds", intervalProgress.getCount(), locusProgress.getCount(), locusProgress.getElapsedSeconds()));
  /**********************************
   * Now output regions for calling *
   **********************************/
  final IntervalList outputIntervals = new IntervalList(intervals.getHeader().clone());
  log.info(String.format("Collecting requested type of intervals (%s)", OUTPUT_TYPE));
  intervals.getIntervals().stream().filter(i -> OUTPUT_TYPE.accepts(i.getName())).forEach(outputIntervals::add);
  log.info("Writing Intervals.");
  outputIntervals.write(OUTPUT);
  log.info(String.format("Execution ending. Total time %d seconds", locusProgress.getElapsedSeconds()));
  return 0;
}

public static void checkRefMD5(SAMSequenceDictionary d, ReferenceSequenceFile refFile, boolean checkExistingMD5,
    boolean failIfMD5Mismatch) throws NoSuchAlgorithmException {
  for (SAMSequenceRecord r : d.getSequences()) {
    ReferenceSequence sequence = refFile.getSequence(r.getSequenceName());
    if (!r.getAttributes().contains(SAMSequenceRecord.MD5_TAG)) {
      String md5 = calculateMD5String(sequence.getBases());
      r.setAttribute(SAMSequenceRecord.MD5_TAG, md5);
    } else {
      if (checkExistingMD5) {
        String existingMD5 = r.getAttribute(SAMSequenceRecord.MD5_TAG);
        String md5 = calculateMD5String(sequence.getBases());
        if (!md5.equals(existingMD5)) {
          String message = String.format("For sequence %s the md5 %s does not match the actual md5 %s.",
              r.getSequenceName(), existingMD5, md5);
          if (failIfMD5Mismatch)
            throw new RuntimeException(message);
          else
            log.warn(message);
        }
      }
    }
  }
}

Javadoc

An interface for working with files of reference sequences regardless of the file format being used.

Most used methods

getSequenceDictionary
Must return a sequence dictionary with at least the following fields completed for each sequence: na
getSubsequenceAt
Gets the subsequence of the contig in the range [start,stop]
getSequence
Retrieves the complete sequence described by this contig.
nextSequence
Retrieves the next whole sequences from the file.
isIndexed
close
toString
reset
Resets the ReferenceSequenceFile so that the next call to nextSequence() will return the first seque

Popular in Java

Start an intent from android
getSystemService (Context)
setRequestProperty (URLConnection)
onRequestPermissionsResult (Fragment)
ConnectException (java.net)
A ConnectException is thrown if a connection cannot be established to a remote host on a specific po
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
TimeZone (java.util)
TimeZone represents a time zone offset, and also figures out daylight savings. Typically, you get a
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
AtomicInteger (java.util.concurrent.atomic)
An int value that may be updated atomically. See the java.util.concurrent.atomic package specificati
Handler (java.util.logging)
A Handler object accepts a logging request and exports the desired messages to a target, for example
Top PhpStorm plugins

How to useReferenceSequenceFile in htsjdk.samtools.reference

Best Java code snippets using htsjdk.samtools.reference.ReferenceSequenceFile (Showing top 20 results out of 315)

How to use
ReferenceSequenceFile
in
htsjdk.samtools.reference