@Override public void setup(final VCFHeader vcfHeader) { final List<String> samples = vcfHeader.getGenotypeSamples(); if (samples == null || samples.size() != 1) { throw new IllegalArgumentException("Expected to have exactly 1 sample in a GVCF, found " + ((samples == null) ? "0" : samples.size())); } sample = samples.get(0); }
@Override public void setup(final VCFHeader vcfHeader) { final List<String> samples = vcfHeader.getGenotypeSamples(); if (samples == null || samples.size() != 1) { throw new IllegalArgumentException("Expected to have exactly 1 sample in a GVCF, found " + ((samples == null) ? "0" : samples.size())); } sample = samples.get(0); }
@Override public void pre() { numConverts.set(0); parseTime.set(0); convertTime.set(0); encodeTime.set(0); postDone.set(false); int gtSize = header.getGenotypeSamples().size(); List<String> genotypeSamples = header.getGenotypeSamples(); }
public static Set<String> getSampleList(Map<String, VCFHeader> headers, GATKVariantContextUtils.GenotypeMergeType mergeOption) { Set<String> samples = new TreeSet<String>(); for ( Map.Entry<String, VCFHeader> val : headers.entrySet() ) { VCFHeader header = val.getValue(); for ( String sample : header.getGenotypeSamples() ) { samples.add(GATKVariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOption == GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY)); } } return samples; }
public void setup(final VCFHeader vcfHeader) { //Use sampleMetricsMap.get in case a sample isn't ever put in the map (due to being all HomRef for example) vcfHeader.getGenotypeSamples().stream().forEach(sampleName -> sampleMetricsMap.get(sampleName)); }
/** * Gets all of the unique sample names from the set of provided VCF rod names input by the user * * @param toolkit GATK engine * @param rodNames list of rods to use; if null, uses all VCF rods * * @return the set of unique samples */ public static Set<String> getUniqueSamplesFromRods(GenomeAnalysisEngine toolkit, Collection<String> rodNames) { Set<String> samples = new LinkedHashSet<>(); for ( VCFHeader header : GATKVCFUtils.getVCFHeadersFromRods(toolkit, rodNames).values() ) samples.addAll(header.getGenotypeSamples()); return samples; }
public void setup(final VCFHeader vcfHeader) { //Use sampleMetricsMap.get in case a sample isn't ever put in the map (due to being all HomRef for example) vcfHeader.getGenotypeSamples().stream().forEach(sampleName -> sampleMetricsMap.get(sampleName)); }
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, PrintStream inputSitesFile) { HashSet<String> overlappingSamples = new HashSet<>(evaluate.getGenotypeSamples()); overlappingSamples.retainAll(truth.getGenotypeSamples()); perSampleGenotypeConcordance = new HashMap<>(overlappingSamples.size()); for ( String sample : overlappingSamples ) { perSampleGenotypeConcordance.put(sample,new GenotypeConcordanceTable()); } overallGenotypeConcordance = new GenotypeConcordanceTable(); overallSiteConcordance = new SiteConcordanceTable(); sitesFile = inputSitesFile; if (sitesFile != null) printSitesFileHeader(); }
public ConcordanceMetrics reduceInit() { Map<String,VCFHeader> headerMap = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(evalBinding,compBinding)); VCFHeader evalHeader = headerMap.get(evalBinding.getName()); evalSamples = evalHeader.getGenotypeSamples(); VCFHeader compHeader = headerMap.get(compBinding.getName()); compSamples = compHeader.getGenotypeSamples(); return new ConcordanceMetrics(evalHeader,compHeader, sitesFile); }
/** * Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary * * @param oldHeader the header to update * @param referenceFile the file path to the reference sequence used to generate this vcf * @param refDict the SAM formatted reference sequence dictionary */ public static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) { return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples()); }
/** * Walks over the VCF header and repairs the standard VCF header lines in it, returning a freshly * allocated {@link VCFHeader} with standard VCF header lines repaired as necessary. */ public static VCFHeader repairStandardHeaderLines(final VCFHeader header) { final Set<VCFHeaderLine> newLines = new LinkedHashSet<VCFHeaderLine>(header.getMetaDataInInputOrder().size()); for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) { if ( line instanceof VCFFormatHeaderLine ) { line = formatStandards.repair((VCFFormatHeaderLine) line); } else if ( line instanceof VCFInfoHeaderLine) { line = infoStandards.repair((VCFInfoHeaderLine) line); } newLines.add(line); } return new VCFHeader(newLines, header.getGenotypeSamples()); }
/** * Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary * * @param oldHeader the header to update * @param referenceFile the file path to the reference sequence used to generate this vcf * @param refDict the SAM formatted reference sequence dictionary */ public static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) { return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples()); }
/** * Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary * * @param oldHeader the header to update * @param referenceFile the file path to the reference sequence used to generate this vcf * @param refDict the SAM formatted reference sequence dictionary */ public static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) { return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples()); }
@Override public void setHeader(VCFHeader header) { genoFieldDecoders = new BCF2GenotypeFieldDecoders(header); fieldDict = BCF2Utils.makeDictionary(header); builders = new GenotypeBuilder[header.getNGenotypeSamples()]; final List<String> genotypeSamples = header.getGenotypeSamples(); for (int i = 0; i < builders.length; ++i) builders[i] = new GenotypeBuilder(genotypeSamples.get(i)); sampleNamesInOrder = header.getSampleNamesInOrder(); sampleNameToOffset = header.getSampleNameToOffset(); }
@Override public void setHeader(VCFHeader header) { genoFieldDecoders = new BCF2GenotypeFieldDecoders(header); fieldDict = BCF2Utils.makeDictionary(header); builders = new GenotypeBuilder[header.getNGenotypeSamples()]; final List<String> genotypeSamples = header.getGenotypeSamples(); for (int i = 0; i < builders.length; ++i) builders[i] = new GenotypeBuilder(genotypeSamples.get(i)); sampleNamesInOrder = header.getSampleNamesInOrder(); sampleNameToOffset = header.getSampleNameToOffset(); }
@Override public void setHeader(VCFHeader header) { genoFieldDecoders = new BCF2GenotypeFieldDecoders(header); fieldDict = BCF2Utils.makeDictionary(header); builders = new GenotypeBuilder[header.getNGenotypeSamples()]; final List<String> genotypeSamples = header.getGenotypeSamples(); for (int i = 0; i < builders.length; ++i) builders[i] = new GenotypeBuilder(genotypeSamples.get(i)); sampleNamesInOrder = header.getSampleNamesInOrder(); sampleNameToOffset = header.getSampleNameToOffset(); }
private void verifySampleRenaming( final File outputVCF, final String newSampleName ) throws IOException { final Pair<VCFHeader, VCIterable<LineIterator>> headerAndVCIter = VCIterable.readAllVCs(outputVCF, new VCFCodec()); final VCFHeader header = headerAndVCIter.getFirst(); final VCIterable<LineIterator> iter = headerAndVCIter.getSecond(); // Verify that sample renaming occurred at both the header and record levels (checking only the first 10 records): Assert.assertEquals(header.getGenotypeSamples().size(), 1, "Wrong number of samples in output vcf header"); Assert.assertEquals(header.getGenotypeSamples().get(0), newSampleName, "Wrong sample name in output vcf header"); int recordCount = 0; while ( iter.hasNext() && recordCount < 10 ) { final VariantContext vcfRecord = iter.next(); Assert.assertEquals(vcfRecord.getSampleNames().size(), 1, "Wrong number of samples in output vcf record"); Assert.assertEquals(vcfRecord.getSampleNames().iterator().next(), newSampleName, "Wrong sample name in output vcf record"); recordCount++; } }
private VariantContext createVCGeneral(final VCFHeader header, final String chrom, final int position) { final List<Allele> alleles = new ArrayList<Allele>(); final Map<String, Object> attributes = new HashMap<String,Object>(); final GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size()); alleles.add(Allele.create("A",true)); alleles.add(Allele.create("ACC",false)); attributes.put("DP","50"); for (final String name : header.getGenotypeSamples()) { final Genotype gt = new GenotypeBuilder(name,alleles.subList(1,2)).GQ(0).attribute("BB", "1").phased(true).make(); genotypes.add(gt); } return new VariantContextBuilder("RANDOM", chrom, position, position, alleles) .genotypes(genotypes).attributes(attributes).make(); }
@Test public void testVCFHeaderSampleRenamingSingleSampleVCF() throws Exception { final VCFCodec codec = new VCFCodec(); codec.setRemappedSampleName("FOOSAMPLE"); final AsciiLineReaderIterator vcfIterator = new AsciiLineReaderIterator(AsciiLineReader.from(new FileInputStream(variantTestDataRoot + "HiSeq.10000.vcf"))); final VCFHeader header = (VCFHeader) codec.readHeader(vcfIterator).getHeaderValue(); Assert.assertEquals(header.getNGenotypeSamples(), 1, "Wrong number of samples in remapped header"); Assert.assertEquals(header.getGenotypeSamples().get(0), "FOOSAMPLE", "Sample name in remapped header has incorrect value"); int recordCount = 0; while (vcfIterator.hasNext() && recordCount < 10) { recordCount++; final VariantContext vcfRecord = codec.decode(vcfIterator.next()); Assert.assertEquals(vcfRecord.getSampleNames().size(), 1, "Wrong number of samples in vcf record after remapping"); Assert.assertEquals(vcfRecord.getSampleNames().iterator().next(), "FOOSAMPLE", "Wrong sample in vcf record after remapping"); } }
public static void assertHeadersEquals(VCFHeader header0, VCFHeader header1) { assertEquals(header0.getColumnCount(), header1.getColumnCount()); assertEquals(header0.getGenotypeSamples(), header1.getGenotypeSamples()); assertEquals(header0.getContigLines(), header1.getContigLines()); for (VCFInfoHeaderLine line0 : header0.getInfoHeaderLines()) { VCFInfoHeaderLine line1 = header1.getInfoHeaderLine(line0.getID()); assertEquals(line0.getCount(), line1.getCount()); assertEquals(line0.getType(), line1.getType()); assertEquals(line0.getDescription(), line1.getDescription()); assertEquals(0, line0.compareTo(line1)); } } }