/** * do we have genotyping data? * * @return true if we have genotyping columns, false otherwise */ public boolean hasGenotypingData() { return getNGenotypeSamples() > 0; }
/** * do we have genotyping data? * * @return true if we have genotyping columns, false otherwise */ public boolean hasGenotypingData() { return getNGenotypeSamples() > 0; }
/** * do we have genotyping data? * * @return true if we have genotyping columns, false otherwise */ public boolean hasGenotypingData() { return getNGenotypeSamples() > 0; }
final VCFHeader header = getHeaderFromPath(variantPath); final int numberOfSamples = header.getNGenotypeSamples(); if (numberOfSamples != 1) { throw new PicardException("Input: " + variantPathString + " was expected to contain a single sample" +
@Override public void setHeader(VCFHeader header) { genoFieldDecoders = new BCF2GenotypeFieldDecoders(header); fieldDict = BCF2Utils.makeDictionary(header); builders = new GenotypeBuilder[header.getNGenotypeSamples()]; final List<String> genotypeSamples = header.getGenotypeSamples(); for (int i = 0; i < builders.length; ++i) builders[i] = new GenotypeBuilder(genotypeSamples.get(i)); sampleNamesInOrder = header.getSampleNamesInOrder(); sampleNameToOffset = header.getSampleNameToOffset(); }
public BCFSplitGuesser(SeekableStream ss, InputStream headerStream) throws IOException { inFile = ss; InputStream bInFile = new BufferedInputStream(inFile); bgzf = BlockCompressedInputStream.isValidFile(bInFile); if (bgzf) bInFile = new BlockCompressedInputStream(bInFile); // Excess buffering here but it can't be helped that BCF2Codec only takes // PositionalBufferedStream. final VCFHeader header = (VCFHeader)bcfCodec.readHeader( new PositionalBufferedStream(bInFile)).getHeaderValue(); contigDictionaryLength = header.getContigLines().size(); genotypeSampleCount = header.getNGenotypeSamples(); }
public BCFSplitGuesser(SeekableStream ss, InputStream headerStream) throws IOException { inFile = ss; InputStream bInFile = new BufferedInputStream(inFile); bgzf = BlockCompressedInputStream.isValidFile(bInFile); if (bgzf) bInFile = new BlockCompressedInputStream(bInFile); // Excess buffering here but it can't be helped that BCF2Codec only takes // PositionalBufferedStream. final VCFHeader header = (VCFHeader)bcfCodec.readHeader( new PositionalBufferedStream(bInFile)).getHeaderValue(); contigDictionaryLength = header.getContigLines().size(); genotypeSampleCount = header.getNGenotypeSamples(); }
public BCFSplitGuesser(SeekableStream ss, InputStream headerStream) throws IOException { inFile = ss; InputStream bInFile = new BufferedInputStream(inFile); bgzf = BlockCompressedInputStream.isValidFile(bInFile); if (bgzf) bInFile = new BlockCompressedInputStream(bInFile); // Excess buffering here but it can't be helped that BCF2Codec only takes // PositionalBufferedStream. final VCFHeader header = (VCFHeader)bcfCodec.readHeader( new PositionalBufferedStream(bInFile)).getHeaderValue(); contigDictionaryLength = header.getContigLines().size(); genotypeSampleCount = header.getNGenotypeSamples(); }
@Override public void setHeader(VCFHeader header) { genoFieldDecoders = new BCF2GenotypeFieldDecoders(header); fieldDict = BCF2Utils.makeDictionary(header); builders = new GenotypeBuilder[header.getNGenotypeSamples()]; final List<String> genotypeSamples = header.getGenotypeSamples(); for (int i = 0; i < builders.length; ++i) builders[i] = new GenotypeBuilder(genotypeSamples.get(i)); sampleNamesInOrder = header.getSampleNamesInOrder(); sampleNameToOffset = header.getSampleNameToOffset(); }
@Override public void setHeader(VCFHeader header) { genoFieldDecoders = new BCF2GenotypeFieldDecoders(header); fieldDict = BCF2Utils.makeDictionary(header); builders = new GenotypeBuilder[header.getNGenotypeSamples()]; final List<String> genotypeSamples = header.getGenotypeSamples(); for (int i = 0; i < builders.length; ++i) builders[i] = new GenotypeBuilder(genotypeSamples.get(i)); sampleNamesInOrder = header.getSampleNamesInOrder(); sampleNameToOffset = header.getSampleNameToOffset(); }
@Test public void testVCFHeaderSampleRenamingSingleSampleVCF() throws Exception { final VCFCodec codec = new VCFCodec(); codec.setRemappedSampleName("FOOSAMPLE"); final AsciiLineReaderIterator vcfIterator = new AsciiLineReaderIterator(AsciiLineReader.from(new FileInputStream(variantTestDataRoot + "HiSeq.10000.vcf"))); final VCFHeader header = (VCFHeader) codec.readHeader(vcfIterator).getHeaderValue(); Assert.assertEquals(header.getNGenotypeSamples(), 1, "Wrong number of samples in remapped header"); Assert.assertEquals(header.getGenotypeSamples().get(0), "FOOSAMPLE", "Sample name in remapped header has incorrect value"); int recordCount = 0; while (vcfIterator.hasNext() && recordCount < 10) { recordCount++; final VariantContext vcfRecord = codec.decode(vcfIterator.next()); Assert.assertEquals(vcfRecord.getSampleNames().size(), 1, "Wrong number of samples in vcf record after remapping"); Assert.assertEquals(vcfRecord.getSampleNames().iterator().next(), "FOOSAMPLE", "Wrong sample in vcf record after remapping"); } }
/** * Decode the sites level data from this classes decoder * * @param builder * @return */ private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) throws IOException { final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT); if ( qual != null ) { builder.log10PError(((Double)qual) / -10.0); } final int nAlleleInfo = decoder.decodeInt(BCF2Type.INT32); final int nFormatSamples = decoder.decodeInt(BCF2Type.INT32); final int nAlleles = nAlleleInfo >> 16; final int nInfo = nAlleleInfo & 0x0000FFFF; final int nFormatFields = nFormatSamples >> 24; final int nSamples = nFormatSamples & 0x00FFFFF; if ( header.getNGenotypeSamples() != nSamples ) error("Reading BCF2 files with different numbers of samples per record " + "is not currently supported. Saw " + header.getNGenotypeSamples() + " samples in header but have a record with " + nSamples + " samples"); decodeID(builder); final List<Allele> alleles = decodeAlleles(builder, pos, nAlleles); decodeFilter(builder); decodeInfo(builder, nInfo); final SitesInfoForDecoding info = new SitesInfoForDecoding(nFormatFields, nSamples, alleles); if ( ! info.isValid() ) error("Sites info is malformed: " + info); return info; }
@Test(dataProvider = "haplotypeMapForWriting") public void testHaplotypeMapWriteToVcf(final HaplotypeMap haplotypeMap) throws Exception { final File temp = File.createTempFile("haplotypeMap", ".vcf"); temp.deleteOnExit(); haplotypeMap.writeAsVcf(temp, TEST_FASTA); final VCFFileReader reader = new VCFFileReader(temp); Assert.assertEquals(reader.getFileHeader().getNGenotypeSamples(), 1, "VCF should have exactly one sample"); Assert.assertEquals(reader.getFileHeader().getSampleNamesInOrder().get(0), HaplotypeMap.HET_GENOTYPE_FOR_PHASING, "VCF sole sample should be " + HaplotypeMap.HET_GENOTYPE_FOR_PHASING); final Iterator<VariantContext> iter = reader.iterator(); final VariantContext first = iter.next(); Assert.assertEquals(first.getContig(), "chr1", "Wrong chromosome on first snp: " + first); Assert.assertEquals(first.getID(), "snp1", "Wrong name on first snp: " + first); Assert.assertEquals(first.getGenotype(0).getExtendedAttribute(VCFConstants.PHASE_SET_KEY), Integer.toString(first.getStart()), "anchor snp should have PS equal to its position " + first); Assert.assertEquals(first.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0D), 1 - 0.15); // because it's swapped w.r.t the reference final VariantContext second = iter.next(); Assert.assertEquals(second.getContig(), "chr1", "Wrong chromosome on second snp: " + second); Assert.assertEquals(second.getID(), "snp2", "Wrong name on second snp: " + second); Assert.assertEquals(second.getGenotype(0).getExtendedAttribute(VCFConstants.PHASE_SET_KEY), Integer.toString(first.getStart()), "Phase set is incorrect on second snp: " + second); Assert.assertEquals(second.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0D), 0.16); final VariantContext third = iter.next(); Assert.assertEquals(third.getContig(), "chr2", "Wrong chromosome on third snp: " + third); Assert.assertEquals(third.getID(), "snp3", "Wrong name on third snp: " + third); Assert.assertFalse (third.getGenotype(0).hasAnyAttribute(VCFConstants.PHASE_SET_KEY), "Third snp should not have a phaseset" + third); Assert.assertEquals(third.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0D), 0.2); }
/** * Create the lazy loader for the genotypes data, and store it in the builder * so that the VC will be able to decode on demand the genotypes data * * @param siteInfo * @param builder */ private void createLazyGenotypesDecoder( final SitesInfoForDecoding siteInfo, final VariantContextBuilder builder ) { if (siteInfo.nSamples > 0) { final LazyGenotypesContext.LazyParser lazyParser = new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields, builders); final LazyData lazyData = new LazyData(header, siteInfo.nFormatFields, decoder.getRecordBytes()); final LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, lazyData, header.getNGenotypeSamples()); // did we resort the sample names? If so, we need to load the genotype data if ( !header.samplesWereAlreadySorted() ) lazy.decode(); builder.genotypesNoValidation(lazy); } }
/** * Decode the sites level data from this classes decoder * * @param builder * @return */ private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) throws IOException { final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT); if ( qual != null ) { builder.log10PError(((Double)qual) / -10.0); } final int nAlleleInfo = decoder.decodeInt(BCF2Type.INT32); final int nFormatSamples = decoder.decodeInt(BCF2Type.INT32); final int nAlleles = nAlleleInfo >> 16; final int nInfo = nAlleleInfo & 0x0000FFFF; final int nFormatFields = nFormatSamples >> 24; final int nSamples = nFormatSamples & 0x00FFFFF; if ( header.getNGenotypeSamples() != nSamples ) error("Reading BCF2 files with different numbers of samples per record " + "is not currently supported. Saw " + header.getNGenotypeSamples() + " samples in header but have a record with " + nSamples + " samples"); decodeID(builder); final List<Allele> alleles = decodeAlleles(builder, pos, nAlleles); decodeFilter(builder); decodeInfo(builder, nInfo); final SitesInfoForDecoding info = new SitesInfoForDecoding(nFormatFields, nSamples, alleles); if ( ! info.isValid() ) error("Sites info is malformed: " + info); return info; }
/** * Create the lazy loader for the genotypes data, and store it in the builder * so that the VC will be able to decode on demand the genotypes data * * @param siteInfo * @param builder */ private void createLazyGenotypesDecoder( final SitesInfoForDecoding siteInfo, final VariantContextBuilder builder ) { if (siteInfo.nSamples > 0) { final LazyGenotypesContext.LazyParser lazyParser = new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields, builders); final LazyData lazyData = new LazyData(header, siteInfo.nFormatFields, decoder.getRecordBytes()); final LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, lazyData, header.getNGenotypeSamples()); // did we resort the sample names? If so, we need to load the genotype data if ( !header.samplesWereAlreadySorted() ) lazy.decode(); builder.genotypesNoValidation(lazy); } }
/** * Create the lazy loader for the genotypes data, and store it in the builder * so that the VC will be able to decode on demand the genotypes data * * @param siteInfo * @param builder */ private void createLazyGenotypesDecoder( final SitesInfoForDecoding siteInfo, final VariantContextBuilder builder ) { if (siteInfo.nSamples > 0) { final LazyGenotypesContext.LazyParser lazyParser = new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields, builders); final LazyData lazyData = new LazyData(header, siteInfo.nFormatFields, decoder.getRecordBytes()); final LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, lazyData, header.getNGenotypeSamples()); // did we resort the sample names? If so, we need to load the genotype data if ( !header.samplesWereAlreadySorted() ) lazy.decode(); builder.genotypesNoValidation(lazy); } }
private byte[] buildSitesData( VariantContext vc ) throws IOException { final int contigIndex = contigDictionary.get(vc.getContig()); if ( contigIndex == -1 ) throw new IllegalStateException(String.format("Contig %s not found in sequence dictionary from reference", vc.getContig())); // note use of encodeRawValue to not insert the typing byte encoder.encodeRawValue(contigIndex, BCF2Type.INT32); // pos. GATK is 1 based, BCF2 is 0 based encoder.encodeRawValue(vc.getStart() - 1, BCF2Type.INT32); // ref length. GATK is closed, but BCF2 is open so the ref length is GATK end - GATK start + 1 // for example, a SNP is in GATK at 1:10-10, which has ref length 10 - 10 + 1 = 1 encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32); // qual if ( vc.hasLog10PError() ) encoder.encodeRawFloat((float) vc.getPhredScaledQual()); else encoder.encodeRawMissingValue(BCF2Type.FLOAT); // info fields final int nAlleles = vc.getNAlleles(); final int nInfo = vc.getAttributes().size(); final int nGenotypeFormatFields = getNGenotypeFormatFields(vc); final int nSamples = header.getNGenotypeSamples(); encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32); encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32); buildID(vc); buildAlleles(vc); buildFilter(vc); buildInfo(vc); return encoder.getRecordBytes(); }
private byte[] buildSitesData( VariantContext vc ) throws IOException { final int contigIndex = contigDictionary.get(vc.getChr()); if ( contigIndex == -1 ) throw new IllegalStateException(String.format("Contig %s not found in sequence dictionary from reference", vc.getChr())); // note use of encodeRawValue to not insert the typing byte encoder.encodeRawValue(contigIndex, BCF2Type.INT32); // pos. GATK is 1 based, BCF2 is 0 based encoder.encodeRawValue(vc.getStart() - 1, BCF2Type.INT32); // ref length. GATK is closed, but BCF2 is open so the ref length is GATK end - GATK start + 1 // for example, a SNP is in GATK at 1:10-10, which has ref length 10 - 10 + 1 = 1 encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32); // qual if ( vc.hasLog10PError() ) encoder.encodeRawFloat((float) vc.getPhredScaledQual()); else encoder.encodeRawMissingValue(BCF2Type.FLOAT); // info fields final int nAlleles = vc.getNAlleles(); final int nInfo = vc.getAttributes().size(); final int nGenotypeFormatFields = getNGenotypeFormatFields(vc); final int nSamples = header.getNGenotypeSamples(); encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32); encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32); buildID(vc); buildAlleles(vc); buildFilter(vc); buildInfo(vc); return encoder.getRecordBytes(); }
private byte[] buildSitesData( VariantContext vc ) throws IOException { final int contigIndex = contigDictionary.get(vc.getContig()); if ( contigIndex == -1 ) throw new IllegalStateException(String.format("Contig %s not found in sequence dictionary from reference", vc.getContig())); // note use of encodeRawValue to not insert the typing byte encoder.encodeRawValue(contigIndex, BCF2Type.INT32); // pos. GATK is 1 based, BCF2 is 0 based encoder.encodeRawValue(vc.getStart() - 1, BCF2Type.INT32); // ref length. GATK is closed, but BCF2 is open so the ref length is GATK end - GATK start + 1 // for example, a SNP is in GATK at 1:10-10, which has ref length 10 - 10 + 1 = 1 encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32); // qual if ( vc.hasLog10PError() ) encoder.encodeRawFloat((float) vc.getPhredScaledQual()); else encoder.encodeRawMissingValue(BCF2Type.FLOAT); // info fields final int nAlleles = vc.getNAlleles(); final int nInfo = vc.getAttributes().size(); final int nGenotypeFormatFields = getNGenotypeFormatFields(vc); final int nSamples = header.getNGenotypeSamples(); encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32); encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32); buildID(vc); buildAlleles(vc); buildFilter(vc); buildInfo(vc); return encoder.getRecordBytes(); }