htsjdk.variant.vcf.VCFHeader.getNGenotypeSamples java code examples

/**
 * do we have genotyping data?
 *
 * @return true if we have genotyping columns, false otherwise
 */
public boolean hasGenotypingData() {
  return getNGenotypeSamples() > 0;
}

/**
 * do we have genotyping data?
 *
 * @return true if we have genotyping columns, false otherwise
 */
public boolean hasGenotypingData() {
  return getNGenotypeSamples() > 0;
}

/**
 * do we have genotyping data?
 *
 * @return true if we have genotyping columns, false otherwise
 */
public boolean hasGenotypingData() {
  return getNGenotypeSamples() > 0;
}

final VCFHeader header = getHeaderFromPath(variantPath);
final int numberOfSamples = header.getNGenotypeSamples();
if (numberOfSamples != 1) {
  throw new PicardException("Input: " + variantPathString + " was expected to contain a single sample" +

@Override public void setHeader(VCFHeader header) {
  genoFieldDecoders = new BCF2GenotypeFieldDecoders(header);
  fieldDict = BCF2Utils.makeDictionary(header);
  builders = new GenotypeBuilder[header.getNGenotypeSamples()];
  final List<String> genotypeSamples = header.getGenotypeSamples();
  for (int i = 0; i < builders.length; ++i)
    builders[i] = new GenotypeBuilder(genotypeSamples.get(i));
  sampleNamesInOrder = header.getSampleNamesInOrder();
  sampleNameToOffset = header.getSampleNameToOffset();
}

public BCFSplitGuesser(SeekableStream ss, InputStream headerStream)
  throws IOException
{
  inFile = ss;
  InputStream bInFile = new BufferedInputStream(inFile);
  bgzf = BlockCompressedInputStream.isValidFile(bInFile);
  if (bgzf)
    bInFile = new BlockCompressedInputStream(bInFile);
  // Excess buffering here but it can't be helped that BCF2Codec only takes
  // PositionalBufferedStream.
  final VCFHeader header =
    (VCFHeader)bcfCodec.readHeader(
      new PositionalBufferedStream(bInFile)).getHeaderValue();
  contigDictionaryLength = header.getContigLines().size();
  genotypeSampleCount    = header.getNGenotypeSamples();
}

public BCFSplitGuesser(SeekableStream ss, InputStream headerStream)
  throws IOException
{
  inFile = ss;
  InputStream bInFile = new BufferedInputStream(inFile);
  bgzf = BlockCompressedInputStream.isValidFile(bInFile);
  if (bgzf)
    bInFile = new BlockCompressedInputStream(bInFile);
  // Excess buffering here but it can't be helped that BCF2Codec only takes
  // PositionalBufferedStream.
  final VCFHeader header =
    (VCFHeader)bcfCodec.readHeader(
      new PositionalBufferedStream(bInFile)).getHeaderValue();
  contigDictionaryLength = header.getContigLines().size();
  genotypeSampleCount    = header.getNGenotypeSamples();
}

public BCFSplitGuesser(SeekableStream ss, InputStream headerStream)
  throws IOException
{
  inFile = ss;
  InputStream bInFile = new BufferedInputStream(inFile);
  bgzf = BlockCompressedInputStream.isValidFile(bInFile);
  if (bgzf)
    bInFile = new BlockCompressedInputStream(bInFile);
  // Excess buffering here but it can't be helped that BCF2Codec only takes
  // PositionalBufferedStream.
  final VCFHeader header =
    (VCFHeader)bcfCodec.readHeader(
      new PositionalBufferedStream(bInFile)).getHeaderValue();
  contigDictionaryLength = header.getContigLines().size();
  genotypeSampleCount    = header.getNGenotypeSamples();
}

@Override public void setHeader(VCFHeader header) {
  genoFieldDecoders = new BCF2GenotypeFieldDecoders(header);
  fieldDict = BCF2Utils.makeDictionary(header);
  builders = new GenotypeBuilder[header.getNGenotypeSamples()];
  final List<String> genotypeSamples = header.getGenotypeSamples();
  for (int i = 0; i < builders.length; ++i)
    builders[i] = new GenotypeBuilder(genotypeSamples.get(i));
  sampleNamesInOrder = header.getSampleNamesInOrder();
  sampleNameToOffset = header.getSampleNameToOffset();
}

@Override public void setHeader(VCFHeader header) {
  genoFieldDecoders = new BCF2GenotypeFieldDecoders(header);
  fieldDict = BCF2Utils.makeDictionary(header);
  builders = new GenotypeBuilder[header.getNGenotypeSamples()];
  final List<String> genotypeSamples = header.getGenotypeSamples();
  for (int i = 0; i < builders.length; ++i)
    builders[i] = new GenotypeBuilder(genotypeSamples.get(i));
  sampleNamesInOrder = header.getSampleNamesInOrder();
  sampleNameToOffset = header.getSampleNameToOffset();
}

@Test
public void testVCFHeaderSampleRenamingSingleSampleVCF() throws Exception {
  final VCFCodec codec = new VCFCodec();
  codec.setRemappedSampleName("FOOSAMPLE");
  final AsciiLineReaderIterator vcfIterator = new AsciiLineReaderIterator(AsciiLineReader.from(new FileInputStream(variantTestDataRoot + "HiSeq.10000.vcf")));
  final VCFHeader header = (VCFHeader) codec.readHeader(vcfIterator).getHeaderValue();
  Assert.assertEquals(header.getNGenotypeSamples(), 1, "Wrong number of samples in remapped header");
  Assert.assertEquals(header.getGenotypeSamples().get(0), "FOOSAMPLE", "Sample name in remapped header has incorrect value");
  int recordCount = 0;
  while (vcfIterator.hasNext() && recordCount < 10) {
    recordCount++;
    final VariantContext vcfRecord = codec.decode(vcfIterator.next());
    Assert.assertEquals(vcfRecord.getSampleNames().size(), 1, "Wrong number of samples in vcf record after remapping");
    Assert.assertEquals(vcfRecord.getSampleNames().iterator().next(), "FOOSAMPLE", "Wrong sample in vcf record after remapping");
  }
}

/**
 * Decode the sites level data from this classes decoder
 *
 * @param builder
 * @return
 */
private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) throws IOException {
  final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT);
  if ( qual != null ) {
    builder.log10PError(((Double)qual) / -10.0);
  }
  final int nAlleleInfo = decoder.decodeInt(BCF2Type.INT32);
  final int nFormatSamples = decoder.decodeInt(BCF2Type.INT32);
  final int nAlleles = nAlleleInfo >> 16;
  final int nInfo = nAlleleInfo & 0x0000FFFF;
  final int nFormatFields = nFormatSamples >> 24;
  final int nSamples = nFormatSamples & 0x00FFFFF;
  if ( header.getNGenotypeSamples() != nSamples )
    error("Reading BCF2 files with different numbers of samples per record " +
        "is not currently supported.  Saw " + header.getNGenotypeSamples() +
        " samples in header but have a record with " + nSamples + " samples");
  decodeID(builder);
  final List<Allele> alleles = decodeAlleles(builder, pos, nAlleles);
  decodeFilter(builder);
  decodeInfo(builder, nInfo);
  final SitesInfoForDecoding info = new SitesInfoForDecoding(nFormatFields, nSamples, alleles);
  if ( ! info.isValid() )
    error("Sites info is malformed: " + info);
  return info;
}

@Test(dataProvider = "haplotypeMapForWriting")
public void testHaplotypeMapWriteToVcf(final HaplotypeMap haplotypeMap) throws Exception {
  final File temp = File.createTempFile("haplotypeMap", ".vcf");
  temp.deleteOnExit();
  haplotypeMap.writeAsVcf(temp, TEST_FASTA);
  final VCFFileReader reader = new VCFFileReader(temp);
  Assert.assertEquals(reader.getFileHeader().getNGenotypeSamples(), 1, "VCF should have exactly one sample");
  Assert.assertEquals(reader.getFileHeader().getSampleNamesInOrder().get(0), HaplotypeMap.HET_GENOTYPE_FOR_PHASING, "VCF sole sample should be " + HaplotypeMap.HET_GENOTYPE_FOR_PHASING);
  final Iterator<VariantContext> iter = reader.iterator();
  final VariantContext first = iter.next();
  Assert.assertEquals(first.getContig(), "chr1", "Wrong chromosome on first snp: " + first);
  Assert.assertEquals(first.getID(), "snp1", "Wrong name on first snp: " + first);
  Assert.assertEquals(first.getGenotype(0).getExtendedAttribute(VCFConstants.PHASE_SET_KEY), Integer.toString(first.getStart()), "anchor snp should have PS equal to its position " + first);
  Assert.assertEquals(first.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0D), 1 - 0.15); // because it's swapped w.r.t the reference
  final VariantContext second = iter.next();
  Assert.assertEquals(second.getContig(), "chr1", "Wrong chromosome on second snp: " + second);
  Assert.assertEquals(second.getID(), "snp2", "Wrong name on second snp: " + second);
  Assert.assertEquals(second.getGenotype(0).getExtendedAttribute(VCFConstants.PHASE_SET_KEY), Integer.toString(first.getStart()), "Phase set is incorrect on second snp: " + second);
  Assert.assertEquals(second.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0D), 0.16);
  final VariantContext third = iter.next();
  Assert.assertEquals(third.getContig(), "chr2", "Wrong chromosome on third snp: " + third);
  Assert.assertEquals(third.getID(), "snp3", "Wrong name on third snp: " + third);
  Assert.assertFalse (third.getGenotype(0).hasAnyAttribute(VCFConstants.PHASE_SET_KEY), "Third snp should not have a phaseset" + third);
  Assert.assertEquals(third.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0D), 0.2);
}

/**
 * Create the lazy loader for the genotypes data, and store it in the builder
 * so that the VC will be able to decode on demand the genotypes data
 *
 * @param siteInfo
 * @param builder
 */
private void createLazyGenotypesDecoder( final SitesInfoForDecoding siteInfo,
                     final VariantContextBuilder builder ) {
  if (siteInfo.nSamples > 0) {
    final LazyGenotypesContext.LazyParser lazyParser =
        new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields, builders);
    final LazyData lazyData = new LazyData(header, siteInfo.nFormatFields, decoder.getRecordBytes());
    final LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, lazyData, header.getNGenotypeSamples());
    // did we resort the sample names?  If so, we need to load the genotype data
    if ( !header.samplesWereAlreadySorted() )
      lazy.decode();
    builder.genotypesNoValidation(lazy);
  }
}

/**
 * Decode the sites level data from this classes decoder
 *
 * @param builder
 * @return
 */
private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) throws IOException {
  final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT);
  if ( qual != null ) {
    builder.log10PError(((Double)qual) / -10.0);
  }
  final int nAlleleInfo = decoder.decodeInt(BCF2Type.INT32);
  final int nFormatSamples = decoder.decodeInt(BCF2Type.INT32);
  final int nAlleles = nAlleleInfo >> 16;
  final int nInfo = nAlleleInfo & 0x0000FFFF;
  final int nFormatFields = nFormatSamples >> 24;
  final int nSamples = nFormatSamples & 0x00FFFFF;
  if ( header.getNGenotypeSamples() != nSamples )
    error("Reading BCF2 files with different numbers of samples per record " +
        "is not currently supported.  Saw " + header.getNGenotypeSamples() +
        " samples in header but have a record with " + nSamples + " samples");
  decodeID(builder);
  final List<Allele> alleles = decodeAlleles(builder, pos, nAlleles);
  decodeFilter(builder);
  decodeInfo(builder, nInfo);
  final SitesInfoForDecoding info = new SitesInfoForDecoding(nFormatFields, nSamples, alleles);
  if ( ! info.isValid() )
    error("Sites info is malformed: " + info);
  return info;
}

/**
 * Create the lazy loader for the genotypes data, and store it in the builder
 * so that the VC will be able to decode on demand the genotypes data
 *
 * @param siteInfo
 * @param builder
 */
private void createLazyGenotypesDecoder( final SitesInfoForDecoding siteInfo,
                     final VariantContextBuilder builder ) {
  if (siteInfo.nSamples > 0) {
    final LazyGenotypesContext.LazyParser lazyParser =
        new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields, builders);
    final LazyData lazyData = new LazyData(header, siteInfo.nFormatFields, decoder.getRecordBytes());
    final LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, lazyData, header.getNGenotypeSamples());
    // did we resort the sample names?  If so, we need to load the genotype data
    if ( !header.samplesWereAlreadySorted() )
      lazy.decode();
    builder.genotypesNoValidation(lazy);
  }
}

/**
 * Create the lazy loader for the genotypes data, and store it in the builder
 * so that the VC will be able to decode on demand the genotypes data
 *
 * @param siteInfo
 * @param builder
 */
private void createLazyGenotypesDecoder( final SitesInfoForDecoding siteInfo,
                     final VariantContextBuilder builder ) {
  if (siteInfo.nSamples > 0) {
    final LazyGenotypesContext.LazyParser lazyParser =
        new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields, builders);
    final LazyData lazyData = new LazyData(header, siteInfo.nFormatFields, decoder.getRecordBytes());
    final LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, lazyData, header.getNGenotypeSamples());
    // did we resort the sample names?  If so, we need to load the genotype data
    if ( !header.samplesWereAlreadySorted() )
      lazy.decode();
    builder.genotypesNoValidation(lazy);
  }
}

private byte[] buildSitesData( VariantContext vc ) throws IOException {
  final int contigIndex = contigDictionary.get(vc.getContig());
  if ( contigIndex == -1 )
    throw new IllegalStateException(String.format("Contig %s not found in sequence dictionary from reference", vc.getContig()));
  // note use of encodeRawValue to not insert the typing byte
  encoder.encodeRawValue(contigIndex, BCF2Type.INT32);
  // pos.  GATK is 1 based, BCF2 is 0 based
  encoder.encodeRawValue(vc.getStart() - 1, BCF2Type.INT32);
  // ref length.  GATK is closed, but BCF2 is open so the ref length is GATK end - GATK start + 1
  // for example, a SNP is in GATK at 1:10-10, which has ref length 10 - 10 + 1 = 1
  encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32);
  // qual
  if ( vc.hasLog10PError() )
    encoder.encodeRawFloat((float) vc.getPhredScaledQual());
  else
    encoder.encodeRawMissingValue(BCF2Type.FLOAT);
  // info fields
  final int nAlleles = vc.getNAlleles();
  final int nInfo = vc.getAttributes().size();
  final int nGenotypeFormatFields = getNGenotypeFormatFields(vc);
  final int nSamples = header.getNGenotypeSamples();
  encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32);
  encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32);
  buildID(vc);
  buildAlleles(vc);
  buildFilter(vc);
  buildInfo(vc);
  return encoder.getRecordBytes();
}

private byte[] buildSitesData( VariantContext vc ) throws IOException {
  final int contigIndex = contigDictionary.get(vc.getChr());
  if ( contigIndex == -1 )
    throw new IllegalStateException(String.format("Contig %s not found in sequence dictionary from reference", vc.getChr()));
  // note use of encodeRawValue to not insert the typing byte
  encoder.encodeRawValue(contigIndex, BCF2Type.INT32);
  // pos.  GATK is 1 based, BCF2 is 0 based
  encoder.encodeRawValue(vc.getStart() - 1, BCF2Type.INT32);
  // ref length.  GATK is closed, but BCF2 is open so the ref length is GATK end - GATK start + 1
  // for example, a SNP is in GATK at 1:10-10, which has ref length 10 - 10 + 1 = 1
  encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32);
  // qual
  if ( vc.hasLog10PError() )
    encoder.encodeRawFloat((float) vc.getPhredScaledQual());
  else
    encoder.encodeRawMissingValue(BCF2Type.FLOAT);
  // info fields
  final int nAlleles = vc.getNAlleles();
  final int nInfo = vc.getAttributes().size();
  final int nGenotypeFormatFields = getNGenotypeFormatFields(vc);
  final int nSamples = header.getNGenotypeSamples();
  encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32);
  encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32);
  buildID(vc);
  buildAlleles(vc);
  buildFilter(vc);
  buildInfo(vc);
  return encoder.getRecordBytes();
}

private byte[] buildSitesData( VariantContext vc ) throws IOException {
  final int contigIndex = contigDictionary.get(vc.getContig());
  if ( contigIndex == -1 )
    throw new IllegalStateException(String.format("Contig %s not found in sequence dictionary from reference", vc.getContig()));
  // note use of encodeRawValue to not insert the typing byte
  encoder.encodeRawValue(contigIndex, BCF2Type.INT32);
  // pos.  GATK is 1 based, BCF2 is 0 based
  encoder.encodeRawValue(vc.getStart() - 1, BCF2Type.INT32);
  // ref length.  GATK is closed, but BCF2 is open so the ref length is GATK end - GATK start + 1
  // for example, a SNP is in GATK at 1:10-10, which has ref length 10 - 10 + 1 = 1
  encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32);
  // qual
  if ( vc.hasLog10PError() )
    encoder.encodeRawFloat((float) vc.getPhredScaledQual());
  else
    encoder.encodeRawMissingValue(BCF2Type.FLOAT);
  // info fields
  final int nAlleles = vc.getNAlleles();
  final int nInfo = vc.getAttributes().size();
  final int nGenotypeFormatFields = getNGenotypeFormatFields(vc);
  final int nSamples = header.getNGenotypeSamples();
  encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32);
  encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32);
  buildID(vc);
  buildAlleles(vc);
  buildFilter(vc);
  buildInfo(vc);
  return encoder.getRecordBytes();
}

Popular methods of VCFHeader

getGenotypeSamples
get the genotyping sample names
<init>
create a VCF header, given a list of meta data and auxillary tags
getSampleNamesInOrder
addMetaDataLine
Adds a new line to the VCFHeader. If there is an existing header line of the same type with the same
getMetaDataInInputOrder
get the meta data, associated with this header, in sorted order
getSequenceDictionary
Returns the contigs in this VCF file as a SAMSequenceDictionary. Returns null if contigs lines are n
getContigLines
getInfoHeaderLines
Returns the INFO HeaderLines in their original ordering
getFormatHeaderLines
Returns the FORMAT HeaderLines in their original ordering
getInfoHeaderLine
getFormatHeaderLine
getMetaDataInSortedOrder

Popular in Java

Updating database using SQL prepared statement
getApplicationContext (Context)
scheduleAtFixedRate (Timer)
addToBackStack (FragmentTransaction)
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
Time (java.sql)
Java representation of an SQL TIME value. Provides utilities to format and parse the time's represen
Calendar (java.util)
Calendar is an abstract base class for converting between a Date object and a set of integer fields
Enumeration (java.util)
A legacy iteration interface.New code should use Iterator instead. Iterator replaces the enumeration
ConcurrentHashMap (java.util.concurrent)
A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
Top Vim plugins

How to use getNGenotypeSamplesmethodin htsjdk.variant.vcf.VCFHeader

Best Java code snippets using htsjdk.variant.vcf.VCFHeader.getNGenotypeSamples (Showing top 20 results out of 315)

How to use
getNGenotypeSamples
method
in
htsjdk.variant.vcf.VCFHeader