@Override public List<String> getSampleNames() { return header.getSampleNamesInOrder(); }
private void enforceSameSamples(final VCFHeader readerHeader, final VCFHeader inputHeader) { final ArrayList<String> readerSamples = readerHeader.getSampleNamesInOrder(); final ArrayList<String> inputSamples = inputHeader.getSampleNamesInOrder(); if (readerSamples.size() != inputSamples.size()) { throw new PicardException("The input VCF had a different # of samples than the input VCF header."); } for (int i = 0; i < readerSamples.size(); i++) { if (!readerSamples.get(i).equals(inputSamples.get(i))) { throw new PicardException(String.format("Mismatch in the %dth sample: '%s' != '%s'", i, readerSamples.get(i), inputSamples.get(i))); } } } }
private void enforceSameSamples(final VCFHeader readerHeader, final VCFHeader inputHeader) { final ArrayList<String> readerSamples = readerHeader.getSampleNamesInOrder(); final ArrayList<String> inputSamples = inputHeader.getSampleNamesInOrder(); if (readerSamples.size() != inputSamples.size()) { throw new PicardException("The input VCF had a different # of samples than the input VCF header."); } for (int i = 0; i < readerSamples.size(); i++) { if (!readerSamples.get(i).equals(inputSamples.get(i))) { throw new PicardException(String.format("Mismatch in the %dth sample: '%s' != '%s'", i, readerSamples.get(i), inputSamples.get(i))); } } } }
sampleList.addAll(header.getSampleNamesInOrder()); } else { if (!sampleList.equals(header.getSampleNamesInOrder())) { throw new IllegalArgumentException("Input file " + input.getAbsolutePath() + " has sample names that don't match the other files.");
@Override protected List<String> getRecordsAsStrings() { List<String> featureList= new ArrayList<String>(); for(IntervalFeature ift : intervalFeatureList){ if(this.getTrackFormat().equals(TrackFormat.VCF) && this.getPrintNormalizedVcf()){ List<String> line= this.normalizeVcfRecordBySample(this.getVcfHeader().getSampleNamesInOrder(), ift.getRaw()); featureList.addAll(line); } else { featureList.add(ift.getRaw()); } } return featureList; }
/** * Load pedigree from file given in configuration or construct singleton pedigree * * @param vcfHeader * {@link VCFHeader}, for checking compatibility and getting sample name in case of * singleton pedigree construction * @throws PedParseException * in the case of problems with parsing pedigrees */ private Pedigree loadPedigree(VCFHeader vcfHeader) throws PedParseException, IOException, IncompatiblePedigreeException { if (options.pathPedFile != null) { final PedFileReader pedReader = new PedFileReader(new File(options.pathPedFile)); final PedFileContents pedContents = pedReader.read(); return new Pedigree(pedContents, pedContents.getIndividuals().get(0).getPedigree()); } else { if (vcfHeader.getSampleNamesInOrder().size() != 1) throw new IncompatiblePedigreeException( "VCF file does not have exactly one sample but required for singleton pedigree construction"); final String sampleName = vcfHeader.getSampleNamesInOrder().get(0); final PedPerson pedPerson = new PedPerson(sampleName, sampleName, "0", "0", Sex.UNKNOWN, Disease.AFFECTED); final PedFileContents pedContents = new PedFileContents(ImmutableList.of(), ImmutableList.of(pedPerson)); return new Pedigree(pedContents, pedContents.getIndividuals().get(0).getPedigree()); } }
public VcfStringAnnotatorTask(VCFHeader header, VCFHeaderVersion version, List<VariantAnnotator> variantAnnotatorList, SharedContext sharedContext, boolean normalize, VariantNormalizer.VariantNormalizerConfig variantNormalizerConfig) { this.vcfCodec = new FullVcfCodec(); this.vcfCodec.setVCFHeader(header, version); this.converter = new VariantContextToVariantConverter("", "", header.getSampleNamesInOrder()); this.variantAnnotatorList = variantAnnotatorList; this.sharedContext = sharedContext; this.normalize = normalize; normalizer = new VariantNormalizer(variantNormalizerConfig); }
@Override public void setHeader(VCFHeader header) { genoFieldDecoders = new BCF2GenotypeFieldDecoders(header); fieldDict = BCF2Utils.makeDictionary(header); builders = new GenotypeBuilder[header.getNGenotypeSamples()]; final List<String> genotypeSamples = header.getGenotypeSamples(); for (int i = 0; i < builders.length; ++i) builders[i] = new GenotypeBuilder(genotypeSamples.get(i)); sampleNamesInOrder = header.getSampleNamesInOrder(); sampleNameToOffset = header.getSampleNameToOffset(); }
@Override public void setHeader(VCFHeader header) { genoFieldDecoders = new BCF2GenotypeFieldDecoders(header); fieldDict = BCF2Utils.makeDictionary(header); builders = new GenotypeBuilder[header.getNGenotypeSamples()]; final List<String> genotypeSamples = header.getGenotypeSamples(); for (int i = 0; i < builders.length; ++i) builders[i] = new GenotypeBuilder(genotypeSamples.get(i)); sampleNamesInOrder = header.getSampleNamesInOrder(); sampleNameToOffset = header.getSampleNameToOffset(); }
final Set<String> allSamples = new HashSet<>(inputHeader.get().getSampleNamesInOrder()); final Iterator<MendelianViolationMetrics> trioIterator = metrics.iterator();
@Override public void setHeader(VCFHeader header) { genoFieldDecoders = new BCF2GenotypeFieldDecoders(header); fieldDict = BCF2Utils.makeDictionary(header); builders = new GenotypeBuilder[header.getNGenotypeSamples()]; final List<String> genotypeSamples = header.getGenotypeSamples(); for (int i = 0; i < builders.length; ++i) builders[i] = new GenotypeBuilder(genotypeSamples.get(i)); sampleNamesInOrder = header.getSampleNamesInOrder(); sampleNameToOffset = header.getSampleNameToOffset(); }
final Set<String> allSamples = new HashSet<>(inputHeader.get().getSampleNamesInOrder()); final Iterator<MendelianViolationMetrics> trioIterator = metrics.iterator();
/** * Add a variant file metadata (from VCF file and header) to a given variant study metadata (from study ID). * * @param filename VCF filename (as an ID) * @param vcfHeader VCF header * @param studyId Study ID */ public void addFile(String filename, VCFHeader vcfHeader, String studyId) { // sanity check if (StringUtils.isEmpty(filename)) { logger.error("VCF filename is empty or null: '{}'", filename); return; } if (vcfHeader == null) { logger.error("VCF header is missingDataset not found. Check your study ID: '{}'", studyId); return; } VCFHeaderToVariantFileHeaderConverter headerConverter = new VCFHeaderToVariantFileHeaderConverter(); VariantFileMetadata variantFileMetadata = new VariantFileMetadata(); variantFileMetadata.setId(filename); variantFileMetadata.setSampleIds(vcfHeader.getSampleNamesInOrder()); variantFileMetadata.setHeader(headerConverter.convert(vcfHeader)); addFile(variantFileMetadata, studyId); }
/** * Tests that we normalize indels correctly */ @Test public void testNormalizeAllelesForIndels() { final Path truthVcfPath = Paths.get(TEST_DATA_PATH.getAbsolutePath(), NORMALIZE_ALLELES_TRUTH); final Path callVcfPath = Paths.get(TEST_DATA_PATH.getAbsolutePath(), NORMALIZE_ALLELES_CALL); final VCFFileReader truthReader = new VCFFileReader(truthVcfPath.toFile(), false); final VCFFileReader callReader = new VCFFileReader(callVcfPath.toFile(), false); final Iterator<VariantContext> truthIterator = truthReader.iterator(); final Iterator<VariantContext> callIterator = callReader.iterator(); final String truthSample = truthReader.getFileHeader().getSampleNamesInOrder().get(0); final String callSample = callReader.getFileHeader().getSampleNamesInOrder().get(0); while (truthIterator.hasNext()) { final VariantContext truthCtx = truthIterator.next(); final VariantContext callCtx = callIterator.next(); { final GenotypeConcordance.Alleles alleles = GenotypeConcordance.normalizeAlleles(truthCtx, truthSample, callCtx, callSample, false); Assert.assertEquals(alleles.truthAllele1, alleles.callAllele1); Assert.assertEquals(alleles.truthAllele2, alleles.callAllele2); } { final GenotypeConcordance.Alleles alleles = GenotypeConcordance.normalizeAlleles(callCtx, callSample, truthCtx, truthSample, false); Assert.assertEquals(alleles.truthAllele1, alleles.callAllele1); Assert.assertEquals(alleles.truthAllele2, alleles.callAllele2); } } truthReader.close(); callReader.close(); }
@Test(dataProvider = "testVCFHeaderDictionaryMergingData") public void testVCFHeaderDictionaryMerging(final String vcfFileName) { final VCFHeader headerOne = new VCFFileReader(new File(variantTestDataRoot + vcfFileName), false).getFileHeader(); final VCFHeader headerTwo = new VCFHeader(headerOne); // deep copy final List<String> sampleList = new ArrayList<String>(); sampleList.addAll(headerOne.getSampleNamesInOrder()); // Check that the two dictionaries start out the same headerOne.getSequenceDictionary().assertSameDictionary(headerTwo.getSequenceDictionary()); // Run the merge command final VCFHeader mergedHeader = new VCFHeader(VCFUtils.smartMergeHeaders(Arrays.asList(headerOne, headerTwo), false), sampleList); // Check that the mergedHeader's sequence dictionary matches the first two mergedHeader.getSequenceDictionary().assertSameDictionary(headerOne.getSequenceDictionary()); }
@Test(dataProvider = "haplotypeMapForWriting") public void testHaplotypeMapWriteToVcf(final HaplotypeMap haplotypeMap) throws Exception { final File temp = File.createTempFile("haplotypeMap", ".vcf"); temp.deleteOnExit(); haplotypeMap.writeAsVcf(temp, TEST_FASTA); final VCFFileReader reader = new VCFFileReader(temp); Assert.assertEquals(reader.getFileHeader().getNGenotypeSamples(), 1, "VCF should have exactly one sample"); Assert.assertEquals(reader.getFileHeader().getSampleNamesInOrder().get(0), HaplotypeMap.HET_GENOTYPE_FOR_PHASING, "VCF sole sample should be " + HaplotypeMap.HET_GENOTYPE_FOR_PHASING); final Iterator<VariantContext> iter = reader.iterator(); final VariantContext first = iter.next(); Assert.assertEquals(first.getContig(), "chr1", "Wrong chromosome on first snp: " + first); Assert.assertEquals(first.getID(), "snp1", "Wrong name on first snp: " + first); Assert.assertEquals(first.getGenotype(0).getExtendedAttribute(VCFConstants.PHASE_SET_KEY), Integer.toString(first.getStart()), "anchor snp should have PS equal to its position " + first); Assert.assertEquals(first.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0D), 1 - 0.15); // because it's swapped w.r.t the reference final VariantContext second = iter.next(); Assert.assertEquals(second.getContig(), "chr1", "Wrong chromosome on second snp: " + second); Assert.assertEquals(second.getID(), "snp2", "Wrong name on second snp: " + second); Assert.assertEquals(second.getGenotype(0).getExtendedAttribute(VCFConstants.PHASE_SET_KEY), Integer.toString(first.getStart()), "Phase set is incorrect on second snp: " + second); Assert.assertEquals(second.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0D), 0.16); final VariantContext third = iter.next(); Assert.assertEquals(third.getContig(), "chr2", "Wrong chromosome on third snp: " + third); Assert.assertEquals(third.getID(), "snp3", "Wrong name on third snp: " + third); Assert.assertFalse (third.getGenotype(0).hasAnyAttribute(VCFConstants.PHASE_SET_KEY), "Third snp should not have a phaseset" + third); Assert.assertEquals(third.getAttributeAsDouble(VCFConstants.ALLELE_FREQUENCY_KEY, 0D), 0.2); }
@Test public void testVCFHeaderSerialization() throws Exception { final VCFFileReader reader = new VCFFileReader(new File("src/test/resources/htsjdk/variant/HiSeq.10000.vcf"), false); final VCFHeader originalHeader = reader.getFileHeader(); reader.close(); final VCFHeader deserializedHeader = TestUtil.serializeAndDeserialize(originalHeader); Assert.assertEquals(deserializedHeader.getMetaDataInInputOrder(), originalHeader.getMetaDataInInputOrder(), "Header metadata does not match before/after serialization"); Assert.assertEquals(deserializedHeader.getContigLines(), originalHeader.getContigLines(), "Contig header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getFilterLines(), originalHeader.getFilterLines(), "Filter header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getFormatHeaderLines(), originalHeader.getFormatHeaderLines(), "Format header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getIDHeaderLines(), originalHeader.getIDHeaderLines(), "ID header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getInfoHeaderLines(), originalHeader.getInfoHeaderLines(), "Info header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getOtherHeaderLines(), originalHeader.getOtherHeaderLines(), "Other header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getGenotypeSamples(), originalHeader.getGenotypeSamples(), "Genotype samples not the same before/after serialization"); Assert.assertEquals(deserializedHeader.samplesWereAlreadySorted(), originalHeader.samplesWereAlreadySorted(), "Sortedness of samples not the same before/after serialization"); Assert.assertEquals(deserializedHeader.getSampleNamesInOrder(), originalHeader.getSampleNamesInOrder(), "Sorted list of sample names in header not the same before/after serialization"); Assert.assertEquals(deserializedHeader.getSampleNameToOffset(), originalHeader.getSampleNameToOffset(), "Sample name to offset map not the same before/after serialization"); Assert.assertEquals(deserializedHeader.toString(), originalHeader.toString(), "String representation of header not the same before/after serialization"); }
genotypes.add(gb.make()); return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset()); } catch ( IOException e ) { throw new TribbleException("Unexpected IOException parsing already read genotypes data block", e);
genotypes.add(gb.make()); return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset()); } catch ( IOException e ) { throw new TribbleException("Unexpected IOException parsing already read genotypes data block", e);
genotypes.add(gb.make()); return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset()); } catch ( IOException e ) { throw new TribbleException("Unexpected IOException parsing already read genotypes data block", e);