@Override public void setHeader(VCFHeader header) { genoFieldDecoders = new BCF2GenotypeFieldDecoders(header); fieldDict = BCF2Utils.makeDictionary(header); builders = new GenotypeBuilder[header.getNGenotypeSamples()]; final List<String> genotypeSamples = header.getGenotypeSamples(); for (int i = 0; i < builders.length; ++i) builders[i] = new GenotypeBuilder(genotypeSamples.get(i)); sampleNamesInOrder = header.getSampleNamesInOrder(); sampleNameToOffset = header.getSampleNameToOffset(); }
/** * Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary * * @param oldHeader the header to update * @param referenceFile the file path to the reference sequence used to generate this vcf * @param refDict the SAM formatted reference sequence dictionary */ public static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) { return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples()); }
/** Add POPMAX name */ private void addPopmaxHeader(VCFHeader header, String prefix, String idInfix, String noteInfix) { VCFInfoHeaderLine line = new VCFInfoHeaderLine(prefix + idInfix + "POPMAX", VCFHeaderLineCount.A, VCFHeaderLineType.String, "Population with the max AF" + noteInfix); header.addMetaDataLine(line); }
@Override public void setHeader(final VCFHeader header) { if (outputHasBeenWritten) { throw new IllegalStateException("The header cannot be modified after the header or variants have been written to the output stream."); } this.mHeader = doNotWriteGenotypes ? new VCFHeader(header.getMetaDataInSortedOrder()) : header; this.vcfEncoder = new VCFEncoder(this.mHeader, this.allowMissingFieldsInHeader, this.writeFullFormatField); } }
public final static VCFCompoundHeaderLine getMetaDataForField(final VCFHeader header, final String field) { VCFCompoundHeaderLine metaData = header.getFormatHeaderLine(field); if ( metaData == null ) metaData = header.getInfoHeaderLine(field); if ( metaData == null ) { if ( ASSUME_MISSING_FIELDS_ARE_STRINGS ) { if ( ! MISSING_KEYS_WARNED_ABOUT.contains(field) ) { MISSING_KEYS_WARNED_ABOUT.add(field); if ( GeneralUtils.DEBUG_MODE_ENABLED ) System.err.println("Field " + field + " missing from VCF header, assuming it is an unbounded string type"); } return new VCFInfoHeaderLine(field, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Auto-generated string header for " + field); } else throw new TribbleException("Fully decoding VariantContext requires header line for all fields, but none was found for " + field); } return metaData; }
@Test public void testVCFHeaderAddContigLine() { final VCFHeader header = getHiSeqVCFHeader(); final VCFContigHeaderLine contigLine = new VCFContigHeaderLine( "<ID=chr1,length=1234567890,assembly=FAKE,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\">", VCFHeaderVersion.VCF4_0, VCFHeader.CONTIG_KEY, 0); header.addMetaDataLine(contigLine); Assert.assertTrue(header.getContigLines().contains(contigLine), "Test contig line not found in contig header lines"); Assert.assertTrue(header.getMetaDataInInputOrder().contains(contigLine), "Test contig line not found in set of all header lines"); Assert.assertFalse(header.getInfoHeaderLines().contains(contigLine), "Test contig line present in info header lines"); Assert.assertFalse(header.getFormatHeaderLines().contains(contigLine), "Test contig line present in format header lines"); Assert.assertFalse(header.getFilterLines().contains(contigLine), "Test contig line present in filter header lines"); Assert.assertFalse(header.getOtherHeaderLines().contains(contigLine), "Test contig line present in other header lines"); }
@Test public void testVCFHeaderSerialization() throws Exception { final VCFFileReader reader = new VCFFileReader(new File("src/test/resources/htsjdk/variant/HiSeq.10000.vcf"), false); final VCFHeader originalHeader = reader.getFileHeader(); reader.close(); final VCFHeader deserializedHeader = TestUtil.serializeAndDeserialize(originalHeader); Assert.assertEquals(deserializedHeader.getMetaDataInInputOrder(), originalHeader.getMetaDataInInputOrder(), "Header metadata does not match before/after serialization"); Assert.assertEquals(deserializedHeader.getContigLines(), originalHeader.getContigLines(), "Contig header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getFilterLines(), originalHeader.getFilterLines(), "Filter header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getFormatHeaderLines(), originalHeader.getFormatHeaderLines(), "Format header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getIDHeaderLines(), originalHeader.getIDHeaderLines(), "ID header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getInfoHeaderLines(), originalHeader.getInfoHeaderLines(), "Info header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getOtherHeaderLines(), originalHeader.getOtherHeaderLines(), "Other header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getGenotypeSamples(), originalHeader.getGenotypeSamples(), "Genotype samples not the same before/after serialization"); Assert.assertEquals(deserializedHeader.samplesWereAlreadySorted(), originalHeader.samplesWereAlreadySorted(), "Sortedness of samples not the same before/after serialization"); Assert.assertEquals(deserializedHeader.getSampleNamesInOrder(), originalHeader.getSampleNamesInOrder(), "Sorted list of sample names in header not the same before/after serialization"); Assert.assertEquals(deserializedHeader.getSampleNameToOffset(), originalHeader.getSampleNameToOffset(), "Sample name to offset map not the same before/after serialization"); Assert.assertEquals(deserializedHeader.toString(), originalHeader.toString(), "String representation of header not the same before/after serialization"); }
outHeader = new VCFHeader(inputHeader.getMetaDataInInputOrder(), existingHeader.getSampleNamesInOrder()); outHeader.getFilterLines() .stream() .map(VCFFilterHeaderLine::getID) .filter(id -> !existingHeader.hasFilterLine(id)) .forEach(id -> log.info("FILTER line found in HEADER will be added to OUTPUT: " + id)); outHeader.getInfoHeaderLines() .stream() .map(VCFInfoHeaderLine::getID) .filter(id -> !existingHeader.hasInfoLine(id)) .forEach(id -> log.info("INFO line found in HEADER will be added to OUTPUT: " + id)); outHeader.getFormatHeaderLines() .stream() .map(VCFFormatHeaderLine::getID) .filter(id -> !existingHeader.hasInfoLine(id)) .forEach(id -> log.info("FORMAT line found in HEADER will be added to OUTPUT: " + id)); if (!existingHeader.hasFilterLine(filter) && !filterHeaderLines.containsKey(filter)) { log.info("Will add an FILTER line with id: " + filter); filterHeaderLines.put(filter, new VCFFilterHeaderLine(filter, "Missing description: this FILTER line was added by Picard's FixVCFHeader")); if (!existingHeader.hasInfoLine(id) && !infoHeaderLines.containsKey(id)) { log.info("Will add an INFO line with id: " + id); infoHeaderLines.put(id, new VCFInfoHeaderLine(id, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Missing description: this INFO line was added by Picard's FixVCFHeader")); for (final Map.Entry<String, Object> attribute : genotype.getExtendedAttributes().entrySet()) { final String id = attribute.getKey(); if (!existingHeader.hasFormatLine(id) && !formatHeaderLines.containsKey(id)) {
public static void assertHeadersEquals(VCFHeader header0, VCFHeader header1) { assertEquals(header0.getColumnCount(), header1.getColumnCount()); assertEquals(header0.getGenotypeSamples(), header1.getGenotypeSamples()); assertEquals(header0.getContigLines(), header1.getContigLines()); for (VCFInfoHeaderLine line0 : header0.getInfoHeaderLines()) { VCFInfoHeaderLine line1 = header1.getInfoHeaderLine(line0.getID()); assertEquals(line0.getCount(), line1.getCount()); assertEquals(line0.getType(), line1.getType()); assertEquals(line0.getDescription(), line1.getDescription()); assertEquals(0, line0.compareTo(line1)); } } }
@Test(dataProvider = "testVCFHeaderDictionaryMergingData") public void testVCFHeaderDictionaryMerging(final String vcfFileName) { final VCFHeader headerOne = new VCFFileReader(new File(variantTestDataRoot + vcfFileName), false).getFileHeader(); final VCFHeader headerTwo = new VCFHeader(headerOne); // deep copy final List<String> sampleList = new ArrayList<String>(); sampleList.addAll(headerOne.getSampleNamesInOrder()); // Check that the two dictionaries start out the same headerOne.getSequenceDictionary().assertSameDictionary(headerTwo.getSequenceDictionary()); // Run the merge command final VCFHeader mergedHeader = new VCFHeader(VCFUtils.smartMergeHeaders(Arrays.asList(headerOne, headerTwo), false), sampleList); // Check that the mergedHeader's sequence dictionary matches the first two mergedHeader.getSequenceDictionary().assertSameDictionary(headerOne.getSequenceDictionary()); }
@Override public void pre() { numConverts.set(0); parseTime.set(0); convertTime.set(0); encodeTime.set(0); postDone.set(false); int gtSize = header.getGenotypeSamples().size(); List<String> genotypeSamples = header.getGenotypeSamples(); }
private static VCFHeader createTestHeader() { final VCFHeader header = new VCFHeader(); header.addMetaDataLine(new VCFInfoHeaderLine(RANDOM_ATTRIBUTE, 1, VCFHeaderLineType.Character, "random string")); return header; }
@Override public List<String> getSampleNames() { return header.getSampleNamesInOrder(); }
final VCFFileReader in = new VCFFileReader(input, false); final VCFHeader header = in.getFileHeader(); final SAMSequenceDictionary dict = in.getFileHeader().getSequenceDictionary(); if (dict == null || dict.isEmpty()) { if (null == samSequenceDictionary) { throw new IllegalArgumentException("Sequence dictionary was missing or empty for the VCF: " + input.getAbsolutePath() + " Please add a sequence dictionary to this VCF or specify SEQUENCE_DICTIONARY."); header.setSequenceDictionary(samSequenceDictionary); } else { if (null == samSequenceDictionary) { sampleList.addAll(header.getSampleNamesInOrder()); } else { if (!sampleList.equals(header.getSampleNamesInOrder())) { throw new IllegalArgumentException("Input file " + input.getAbsolutePath() + " has sample names that don't match the other files.");
/** * Returns the SAMSequenceDictionary from the provided VCF file. */ public static SAMSequenceDictionary getSequenceDictionary(final Path path) { return new VCFFileReader(path, false).getFileHeader().getSequenceDictionary(); }
@Test(dataProvider = "RepairHeaderTest") public void testRepairHeaderTest(final RepairHeaderTest cfg) { final VCFHeader toRepair = new VCFHeader(Collections.singleton((VCFHeaderLine)cfg.original)); final VCFHeader repaired = VCFStandardHeaderLines.repairStandardHeaderLines(toRepair); VCFCompoundHeaderLine repairedLine = (VCFCompoundHeaderLine)repaired.getFormatHeaderLine(cfg.original.getID()); if ( repairedLine == null ) repairedLine = (VCFCompoundHeaderLine)repaired.getInfoHeaderLine(cfg.original.getID()); Assert.assertNotNull(repairedLine, "Repaired header didn't contain the expected line"); Assert.assertEquals(repairedLine.getID(), cfg.expectedResult.getID()); Assert.assertEquals(repairedLine.getType(), cfg.expectedResult.getType()); Assert.assertEquals(repairedLine.getCountType(), cfg.expectedResult.getCountType()); if ( repairedLine.getCountType() == VCFHeaderLineCount.INTEGER ) { Assert.assertEquals(repairedLine.getCount(), cfg.expectedResult.getCount()); } } }
/** * Gets a VCF file ready to write variants to * A header based on this processing context will have already been written to the returned writer * It is the responsibility of the caller to close the returned @link {@link VariantContextWriter} * @param output file * @return opened output VCF stream */ public VariantContextWriter getVariantContextWriter(File file, boolean createIndex) { return getVariantContextWriter(file, new VCFHeader(), createIndex); } /**
public Configuration configure(VCFHeader header) { configure(header.getInfoHeaderLines()); configure(header.getFormatHeaderLines()); return this; }
@Test public void testVCFHeaderAddDuplicateContigLine() { File input = new File("src/test/resources/htsjdk/variant/ex2.vcf"); VCFFileReader reader = new VCFFileReader(input, false); VCFHeader header = reader.getFileHeader(); final int numContigLinesBefore = header.getContigLines().size(); // try to readd the first contig line header.addMetaDataLine(header.getContigLines().get(0)); final int numContigLinesAfter = header.getContigLines().size(); // assert that we have the same number of contig lines before and after Assert.assertEquals(numContigLinesBefore, numContigLinesAfter); }
/** * Converts INFO, FORMAT, FILTER. * @param headerConverter * @param header * @param vs */ private static void convertHeaders(Converter<VCFHeaderLine, VariantSetMetadata> headerConverter, VCFHeader header, VariantSet vs) { Collection[] coll = new Collection[] { header.getInfoHeaderLines(), header.getFilterLines(), header.getFormatHeaderLines(), // header.getContigLines(), // TODO other formats }; for (Collection<? extends VCFHeaderLine> c : coll) { if (null != c) { for (VCFHeaderLine hl : c) { vs.getMetadata().add(headerConverter.forward(hl)); } } } }