public Configuration configure(VCFHeader header) { configure(header.getInfoHeaderLines()); configure(header.getFormatHeaderLines()); return this; }
Iterator<VCFFormatHeaderLine> iter = vcfHeader.getFormatHeaderLines().iterator(); while(iter.hasNext()){
/** * Converts INFO, FORMAT, FILTER. * @param headerConverter * @param header * @param vs */ private static void convertHeaders(Converter<VCFHeaderLine, VariantSetMetadata> headerConverter, VCFHeader header, VariantSet vs) { Collection[] coll = new Collection[] { header.getInfoHeaderLines(), header.getFilterLines(), header.getFormatHeaderLines(), // header.getContigLines(), // TODO other formats }; for (Collection<? extends VCFHeaderLine> c : coll) { if (null != c) { for (VCFHeaderLine hl : c) { vs.getMetadata().add(headerConverter.forward(hl)); } } } }
@Test public void testVCFHeaderAddOtherLine() { final VCFHeader header = getHiSeqVCFHeader(); final VCFHeaderLine otherLine = new VCFHeaderLine("TestOtherLine", "val"); header.addMetaDataLine(otherLine); Assert.assertTrue(header.getOtherHeaderLines().contains(otherLine), "TestOtherLine not found in other header lines"); Assert.assertTrue(header.getMetaDataInInputOrder().contains(otherLine), "TestOtherLine not found in set of all header lines"); Assert.assertNotNull(header.getOtherHeaderLine("TestOtherLine"), "Lookup for TestOtherLine by key failed"); Assert.assertFalse(header.getInfoHeaderLines().contains(otherLine), "TestOtherLine present in info header lines"); Assert.assertFalse(header.getFormatHeaderLines().contains(otherLine), "TestOtherLine present in format header lines"); Assert.assertFalse(header.getContigLines().contains(otherLine), "TestOtherLine present in contig header lines"); Assert.assertFalse(header.getFilterLines().contains(otherLine), "TestOtherLine present in filter header lines"); }
@Test public void testVCFHeaderAddContigLine() { final VCFHeader header = getHiSeqVCFHeader(); final VCFContigHeaderLine contigLine = new VCFContigHeaderLine( "<ID=chr1,length=1234567890,assembly=FAKE,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\">", VCFHeaderVersion.VCF4_0, VCFHeader.CONTIG_KEY, 0); header.addMetaDataLine(contigLine); Assert.assertTrue(header.getContigLines().contains(contigLine), "Test contig line not found in contig header lines"); Assert.assertTrue(header.getMetaDataInInputOrder().contains(contigLine), "Test contig line not found in set of all header lines"); Assert.assertFalse(header.getInfoHeaderLines().contains(contigLine), "Test contig line present in info header lines"); Assert.assertFalse(header.getFormatHeaderLines().contains(contigLine), "Test contig line present in format header lines"); Assert.assertFalse(header.getFilterLines().contains(contigLine), "Test contig line present in filter header lines"); Assert.assertFalse(header.getOtherHeaderLines().contains(contigLine), "Test contig line present in other header lines"); }
@Test public void testVCFHeaderAddInfoLine() { final VCFHeader header = getHiSeqVCFHeader(); final VCFInfoHeaderLine infoLine = new VCFInfoHeaderLine("TestInfoLine", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "test info line"); header.addMetaDataLine(infoLine); Assert.assertTrue(header.getInfoHeaderLines().contains(infoLine), "TestInfoLine not found in info header lines"); Assert.assertTrue(header.getMetaDataInInputOrder().contains(infoLine), "TestInfoLine not found in set of all header lines"); Assert.assertNotNull(header.getInfoHeaderLine("TestInfoLine"), "Lookup for TestInfoLine by key failed"); Assert.assertFalse(header.getFormatHeaderLines().contains(infoLine), "TestInfoLine present in format header lines"); Assert.assertFalse(header.getFilterLines().contains(infoLine), "TestInfoLine present in filter header lines"); Assert.assertFalse(header.getContigLines().contains(infoLine), "TestInfoLine present in contig header lines"); Assert.assertFalse(header.getOtherHeaderLines().contains(infoLine), "TestInfoLine present in other header lines"); }
@Test public void testVCFHeaderAddFormatLine() { final VCFHeader header = getHiSeqVCFHeader(); final VCFFormatHeaderLine formatLine = new VCFFormatHeaderLine("TestFormatLine", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "test format line"); header.addMetaDataLine(formatLine); Assert.assertTrue(header.getFormatHeaderLines().contains(formatLine), "TestFormatLine not found in format header lines"); Assert.assertTrue(header.getMetaDataInInputOrder().contains(formatLine), "TestFormatLine not found in set of all header lines"); Assert.assertNotNull(header.getFormatHeaderLine("TestFormatLine"), "Lookup for TestFormatLine by key failed"); Assert.assertFalse(header.getInfoHeaderLines().contains(formatLine), "TestFormatLine present in info header lines"); Assert.assertFalse(header.getFilterLines().contains(formatLine), "TestFormatLine present in filter header lines"); Assert.assertFalse(header.getContigLines().contains(formatLine), "TestFormatLine present in contig header lines"); Assert.assertFalse(header.getOtherHeaderLines().contains(formatLine), "TestFormatLine present in other header lines"); }
@Test public void testVCFHeaderAddFilterLine() { final VCFHeader header = getHiSeqVCFHeader(); final String filterDesc = "TestFilterLine Description"; final VCFFilterHeaderLine filterLine = new VCFFilterHeaderLine("TestFilterLine",filterDesc); Assert.assertEquals(filterDesc,filterLine.getDescription()); header.addMetaDataLine(filterLine); Assert.assertTrue(header.getFilterLines().contains(filterLine), "TestFilterLine not found in filter header lines"); Assert.assertTrue(header.getMetaDataInInputOrder().contains(filterLine), "TestFilterLine not found in set of all header lines"); Assert.assertNotNull(header.getFilterHeaderLine("TestFilterLine"), "Lookup for TestFilterLine by key failed"); Assert.assertFalse(header.getInfoHeaderLines().contains(filterLine), "TestFilterLine present in info header lines"); Assert.assertFalse(header.getFormatHeaderLines().contains(filterLine), "TestFilterLine present in format header lines"); Assert.assertFalse(header.getContigLines().contains(filterLine), "TestFilterLine present in contig header lines"); Assert.assertFalse(header.getOtherHeaderLines().contains(filterLine), "TestFilterLine present in other header lines"); }
/** * Setup the FieldWriters appropriate to each INFO and FORMAT in the VCF header * * Must be called before any of the getter methods will work * * @param header a VCFHeader containing description for every INFO and FORMAT field we'll attempt to write out to BCF * @param encoder the encoder we are going to use to write out the BCF2 data * @param stringDictionary a map from VCFHeader strings to their offsets for encoding */ public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map<String, Integer> stringDictionary) { for (final VCFInfoHeaderLine line : header.getInfoHeaderLines()) { final String field = line.getID(); final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, line, encoder, stringDictionary); add(siteWriters, field, writer); } for (final VCFFormatHeaderLine line : header.getFormatHeaderLines()) { final String field = line.getID(); final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, line, encoder, stringDictionary); add(genotypesWriters, field, writer); } }
@Test public void testVCFHeaderSerialization() throws Exception { final VCFFileReader reader = new VCFFileReader(new File("src/test/resources/htsjdk/variant/HiSeq.10000.vcf"), false); final VCFHeader originalHeader = reader.getFileHeader(); reader.close(); final VCFHeader deserializedHeader = TestUtil.serializeAndDeserialize(originalHeader); Assert.assertEquals(deserializedHeader.getMetaDataInInputOrder(), originalHeader.getMetaDataInInputOrder(), "Header metadata does not match before/after serialization"); Assert.assertEquals(deserializedHeader.getContigLines(), originalHeader.getContigLines(), "Contig header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getFilterLines(), originalHeader.getFilterLines(), "Filter header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getFormatHeaderLines(), originalHeader.getFormatHeaderLines(), "Format header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getIDHeaderLines(), originalHeader.getIDHeaderLines(), "ID header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getInfoHeaderLines(), originalHeader.getInfoHeaderLines(), "Info header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getOtherHeaderLines(), originalHeader.getOtherHeaderLines(), "Other header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getGenotypeSamples(), originalHeader.getGenotypeSamples(), "Genotype samples not the same before/after serialization"); Assert.assertEquals(deserializedHeader.samplesWereAlreadySorted(), originalHeader.samplesWereAlreadySorted(), "Sortedness of samples not the same before/after serialization"); Assert.assertEquals(deserializedHeader.getSampleNamesInOrder(), originalHeader.getSampleNamesInOrder(), "Sorted list of sample names in header not the same before/after serialization"); Assert.assertEquals(deserializedHeader.getSampleNameToOffset(), originalHeader.getSampleNameToOffset(), "Sample name to offset map not the same before/after serialization"); Assert.assertEquals(deserializedHeader.toString(), originalHeader.toString(), "String representation of header not the same before/after serialization"); }
.filter(id -> !existingHeader.hasInfoLine(id)) .forEach(id -> log.info("INFO line found in HEADER will be added to OUTPUT: " + id)); outHeader.getFormatHeaderLines() .stream() .map(VCFFormatHeaderLine::getID)
/** * Setup the FieldWriters appropriate to each INFO and FORMAT in the VCF header * * Must be called before any of the getter methods will work * * @param header a VCFHeader containing description for every INFO and FORMAT field we'll attempt to write out to BCF * @param encoder the encoder we are going to use to write out the BCF2 data * @param stringDictionary a map from VCFHeader strings to their offsets for encoding */ public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map<String, Integer> stringDictionary) { for (final VCFInfoHeaderLine line : header.getInfoHeaderLines()) { final String field = line.getID(); final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, line, encoder, stringDictionary); add(siteWriters, field, writer); } for (final VCFFormatHeaderLine line : header.getFormatHeaderLines()) { final String field = line.getID(); final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, line, encoder, stringDictionary); add(genotypesWriters, field, writer); } }
/** * Setup the FieldWriters appropriate to each INFO and FORMAT in the VCF header * * Must be called before any of the getter methods will work * * @param header a VCFHeader containing description for every INFO and FORMAT field we'll attempt to write out to BCF * @param encoder the encoder we are going to use to write out the BCF2 data * @param stringDictionary a map from VCFHeader strings to their offsets for encoding */ public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map<String, Integer> stringDictionary) { for (final VCFInfoHeaderLine line : header.getInfoHeaderLines()) { final String field = line.getID(); final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, line, encoder, stringDictionary); add(siteWriters, field, writer); } for (final VCFFormatHeaderLine line : header.getFormatHeaderLines()) { final String field = line.getID(); final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, line, encoder, stringDictionary); add(genotypesWriters, field, writer); } }
private void runFixVcfHeader(final int checkFirstNRecords, final File replacementHeader, final boolean enforceSampleSamples) throws IOException { final FixVcfHeader program = new FixVcfHeader(); final File outputVcf = VcfTestUtils.createTemporaryIndexedFile("output.", ".vcf"); program.INPUT = INPUT_VCF; program.OUTPUT = outputVcf; if (replacementHeader == null) { program.CHECK_FIRST_N_RECORDS = checkFirstNRecords; } else { program.HEADER = replacementHeader; program.ENFORCE_SAME_SAMPLES = enforceSampleSamples; } Assert.assertEquals(program.instanceMain(new String[0]), 0); final VCFFileReader actualReader = new VCFFileReader(OUTPUT_VCF, false); final VCFFileReader expectedReader = new VCFFileReader(outputVcf, false); // Check that the headers match (order does not matter final VCFHeader actualHeader = actualReader.getFileHeader(); final VCFHeader expectedHeader = expectedReader.getFileHeader(); Assert.assertEquals(actualHeader.getFilterLines().size(), expectedHeader.getFilterLines().size()); Assert.assertEquals(actualHeader.getInfoHeaderLines().size(), expectedHeader.getInfoHeaderLines().size()); Assert.assertEquals(actualHeader.getFormatHeaderLines().size(), expectedHeader.getFormatHeaderLines().size()); // Check the number of records match, since we don't touch them Assert.assertEquals(actualReader.iterator().stream().count(), expectedReader.iterator().stream().count(), "The wrong number of variants was found."); CloserUtil.close(actualReader); CloserUtil.close(expectedReader); }
.filter(id -> !existingHeader.hasInfoLine(id)) .forEach(id -> log.info("INFO line found in HEADER will be added to OUTPUT: " + id)); outHeader.getFormatHeaderLines() .stream() .map(VCFFormatHeaderLine::getID)