private static String getCommandLineKey(final VCFHeader header, final String walkerName) { final Iterator<VCFHeaderLine> existingMetaDataIterator = header.getMetaDataInInputOrder().iterator(); // the command line argument keys are in the format GATK_COMMAND_LINE_KEY.(walker name) final String searchKey = String.format("%s.%s", GATK_COMMAND_LINE_KEY, walkerName); int commandLineKeyCount = 0; VCFHeaderLine line; while ( existingMetaDataIterator.hasNext() ) { line = existingMetaDataIterator.next(); // if we find another key that starts with the same text as the walker if ( line.getKey().startsWith(searchKey) ) commandLineKeyCount++; } // if there are no existing keys with this same walker name, then just return the // GATK_COMMAND_LINE_KEY.(walker name) format if ( commandLineKeyCount == 0 ) return searchKey; // otherwise append the count associated with this new command (existing + 1) else return String.format("%s.%d", searchKey, commandLineKeyCount+1); }
public static VCFHeaderVersion getVCFHeaderVersion(VCFHeader vcfHeader){ Iterator<VCFHeaderLine> iter = vcfHeader.getMetaDataInInputOrder().iterator(); while(iter.hasNext()){ VCFHeaderLine hl = iter.next(); if(hl.getKey().equals("fileformat")){ return VCFHeaderVersion.toHeaderVersion(hl.getValue()); } } return null; }
/** * Walks over the VCF header and repairs the standard VCF header lines in it, returning a freshly * allocated {@link VCFHeader} with standard VCF header lines repaired as necessary. */ public static VCFHeader repairStandardHeaderLines(final VCFHeader header) { final Set<VCFHeaderLine> newLines = new LinkedHashSet<VCFHeaderLine>(header.getMetaDataInInputOrder().size()); for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) { if ( line instanceof VCFFormatHeaderLine ) { line = formatStandards.repair((VCFFormatHeaderLine) line); } else if ( line instanceof VCFInfoHeaderLine) { line = infoStandards.repair((VCFInfoHeaderLine) line); } newLines.add(line); } return new VCFHeader(newLines, header.getGenotypeSamples()); }
/** * Walks over the VCF header and repairs the standard VCF header lines in it, returning a freshly * allocated {@link VCFHeader} with standard VCF header lines repaired as necessary. */ public static VCFHeader repairStandardHeaderLines(final VCFHeader header) { final Set<VCFHeaderLine> newLines = new LinkedHashSet<VCFHeaderLine>(header.getMetaDataInInputOrder().size()); for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) { if ( line instanceof VCFFormatHeaderLine ) { line = formatStandards.repair((VCFFormatHeaderLine) line); } else if ( line instanceof VCFInfoHeaderLine) { line = infoStandards.repair((VCFInfoHeaderLine) line); } newLines.add(line); } return new VCFHeader(newLines, header.getGenotypeSamples()); }
/** * Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary * * @param oldHeader the header to update * @param referenceFile the file path to the reference sequence used to generate this vcf * @param refDict the SAM formatted reference sequence dictionary */ public static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) { return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples()); }
/** * Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary * * @param oldHeader the header to update * @param referenceFile the file path to the reference sequence used to generate this vcf * @param refDict the SAM formatted reference sequence dictionary */ public static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) { return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples()); }
/** * Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary * * @param oldHeader the header to update * @param referenceFile the file path to the reference sequence used to generate this vcf * @param refDict the SAM formatted reference sequence dictionary */ public static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) { return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples()); }
/** * Walks over the VCF header and repairs the standard VCF header lines in it, returning a freshly * allocated VCFHeader with standard VCF header lines repaired as necessary * * @param header * @return */ @Requires("header != null") @Ensures("result != null") public static VCFHeader repairStandardHeaderLines(final VCFHeader header) { final Set<VCFHeaderLine> newLines = new LinkedHashSet<VCFHeaderLine>(header.getMetaDataInInputOrder().size()); for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) { if ( line instanceof VCFFormatHeaderLine ) { line = formatStandards.repair((VCFFormatHeaderLine) line); } else if ( line instanceof VCFInfoHeaderLine) { line = infoStandards.repair((VCFInfoHeaderLine) line); } newLines.add(line); } return new VCFHeader(newLines, header.getGenotypeSamples()); }
private static boolean expectedConsistent(final VCFHeader combinationHeader, final int minCounterForInputLines) { final List<Integer> ids = new ArrayList<Integer>(); for ( final VCFHeaderLine line : combinationHeader.getMetaDataInInputOrder() ) { if ( line instanceof VCFIDHeaderLine) { ids.add(Integer.valueOf(((VCFIDHeaderLine) line).getID())); } } // as long as the start contains all of the ids up to minCounterForInputLines in order for ( int i = 0; i < minCounterForInputLines; i++ ) if ( i >= ids.size() || ids.get(i) != i ) return false; return true; }
@Test public void testVCFHeaderAddContigLine() { final VCFHeader header = getHiSeqVCFHeader(); final VCFContigHeaderLine contigLine = new VCFContigHeaderLine( "<ID=chr1,length=1234567890,assembly=FAKE,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\">", VCFHeaderVersion.VCF4_0, VCFHeader.CONTIG_KEY, 0); header.addMetaDataLine(contigLine); Assert.assertTrue(header.getContigLines().contains(contigLine), "Test contig line not found in contig header lines"); Assert.assertTrue(header.getMetaDataInInputOrder().contains(contigLine), "Test contig line not found in set of all header lines"); Assert.assertFalse(header.getInfoHeaderLines().contains(contigLine), "Test contig line present in info header lines"); Assert.assertFalse(header.getFormatHeaderLines().contains(contigLine), "Test contig line present in format header lines"); Assert.assertFalse(header.getFilterLines().contains(contigLine), "Test contig line present in filter header lines"); Assert.assertFalse(header.getOtherHeaderLines().contains(contigLine), "Test contig line present in other header lines"); }
@Override public void setHeader(VCFHeader header) { VCFHeaderVersion version = null; // Normally AbstractVCFCodec parses the header and thereby sets the // version field. It gets used later on so we need to set it. for (final VCFHeaderLine line : header.getMetaDataInInputOrder()) { if (VCFHeaderVersion.isFormatString(line.getKey())) { version = VCFHeaderVersion.toHeaderVersion(line.getValue()); break; } } codec.setHeaderAndVersion(header, version); }
@Override public void setHeader(VCFHeader header) { VCFHeaderVersion version = null; // Normally AbstractVCFCodec parses the header and thereby sets the // version field. It gets used later on so we need to set it. for (final VCFHeaderLine line : header.getMetaDataInInputOrder()) { if (VCFHeaderVersion.isFormatString(line.getKey())) { version = VCFHeaderVersion.toHeaderVersion(line.getValue()); break; } } codec.setHeaderAndVersion(header, version); }
@Override public void setHeader(VCFHeader header) { VCFHeaderVersion version = null; // Normally AbstractVCFCodec parses the header and thereby sets the // version field. It gets used later on so we need to set it. for (final VCFHeaderLine line : header.getMetaDataInInputOrder()) { if (VCFHeaderVersion.isFormatString(line.getKey())) { version = VCFHeaderVersion.toHeaderVersion(line.getValue()); break; } } codec.setHeaderAndVersion(header, version); }
private VCFHeader createHeader(String headerStr) { VCFCodec codec = new VCFCodec(); VCFHeader header = (VCFHeader) codec.readActualHeader(new LineIteratorImpl(new SynchronousLineReader( new StringReader(headerStr)))); Assert.assertEquals(header.getMetaDataInInputOrder().size(), VCF4headerStringCount); return header; }
@Test public void testVCFHeaderAddOtherLine() { final VCFHeader header = getHiSeqVCFHeader(); final VCFHeaderLine otherLine = new VCFHeaderLine("TestOtherLine", "val"); header.addMetaDataLine(otherLine); Assert.assertTrue(header.getOtherHeaderLines().contains(otherLine), "TestOtherLine not found in other header lines"); Assert.assertTrue(header.getMetaDataInInputOrder().contains(otherLine), "TestOtherLine not found in set of all header lines"); Assert.assertNotNull(header.getOtherHeaderLine("TestOtherLine"), "Lookup for TestOtherLine by key failed"); Assert.assertFalse(header.getInfoHeaderLines().contains(otherLine), "TestOtherLine present in info header lines"); Assert.assertFalse(header.getFormatHeaderLines().contains(otherLine), "TestOtherLine present in format header lines"); Assert.assertFalse(header.getContigLines().contains(otherLine), "TestOtherLine present in contig header lines"); Assert.assertFalse(header.getFilterLines().contains(otherLine), "TestOtherLine present in filter header lines"); }
@Test public void testVCFHeaderAddInfoLine() { final VCFHeader header = getHiSeqVCFHeader(); final VCFInfoHeaderLine infoLine = new VCFInfoHeaderLine("TestInfoLine", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "test info line"); header.addMetaDataLine(infoLine); Assert.assertTrue(header.getInfoHeaderLines().contains(infoLine), "TestInfoLine not found in info header lines"); Assert.assertTrue(header.getMetaDataInInputOrder().contains(infoLine), "TestInfoLine not found in set of all header lines"); Assert.assertNotNull(header.getInfoHeaderLine("TestInfoLine"), "Lookup for TestInfoLine by key failed"); Assert.assertFalse(header.getFormatHeaderLines().contains(infoLine), "TestInfoLine present in format header lines"); Assert.assertFalse(header.getFilterLines().contains(infoLine), "TestInfoLine present in filter header lines"); Assert.assertFalse(header.getContigLines().contains(infoLine), "TestInfoLine present in contig header lines"); Assert.assertFalse(header.getOtherHeaderLines().contains(infoLine), "TestInfoLine present in other header lines"); }
@Test public void testVCFHeaderAddFormatLine() { final VCFHeader header = getHiSeqVCFHeader(); final VCFFormatHeaderLine formatLine = new VCFFormatHeaderLine("TestFormatLine", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "test format line"); header.addMetaDataLine(formatLine); Assert.assertTrue(header.getFormatHeaderLines().contains(formatLine), "TestFormatLine not found in format header lines"); Assert.assertTrue(header.getMetaDataInInputOrder().contains(formatLine), "TestFormatLine not found in set of all header lines"); Assert.assertNotNull(header.getFormatHeaderLine("TestFormatLine"), "Lookup for TestFormatLine by key failed"); Assert.assertFalse(header.getInfoHeaderLines().contains(formatLine), "TestFormatLine present in info header lines"); Assert.assertFalse(header.getFilterLines().contains(formatLine), "TestFormatLine present in filter header lines"); Assert.assertFalse(header.getContigLines().contains(formatLine), "TestFormatLine present in contig header lines"); Assert.assertFalse(header.getOtherHeaderLines().contains(formatLine), "TestFormatLine present in other header lines"); }
@Test public void testVCFHeaderAddFilterLine() { final VCFHeader header = getHiSeqVCFHeader(); final String filterDesc = "TestFilterLine Description"; final VCFFilterHeaderLine filterLine = new VCFFilterHeaderLine("TestFilterLine",filterDesc); Assert.assertEquals(filterDesc,filterLine.getDescription()); header.addMetaDataLine(filterLine); Assert.assertTrue(header.getFilterLines().contains(filterLine), "TestFilterLine not found in filter header lines"); Assert.assertTrue(header.getMetaDataInInputOrder().contains(filterLine), "TestFilterLine not found in set of all header lines"); Assert.assertNotNull(header.getFilterHeaderLine("TestFilterLine"), "Lookup for TestFilterLine by key failed"); Assert.assertFalse(header.getInfoHeaderLines().contains(filterLine), "TestFilterLine present in info header lines"); Assert.assertFalse(header.getFormatHeaderLines().contains(filterLine), "TestFilterLine present in format header lines"); Assert.assertFalse(header.getContigLines().contains(filterLine), "TestFilterLine present in contig header lines"); Assert.assertFalse(header.getOtherHeaderLines().contains(filterLine), "TestFilterLine present in other header lines"); }
@Test(enabled = true) public void testGATKVersionInVCF() throws Exception { WalkerTestSpec spec = new WalkerTestSpec("-T TestPrintVariantsWalker -R " + b37KGReference + " -V " + privateTestDir + "NA12878.WGS.b37.chr20.firstMB.vcf" + " -o %s -L 20:61098", 1, Arrays.asList("")); spec.disableShadowBCF(); final File vcf = executeTest("testGATKVersionInVCF", spec).first.get(0); final VCFCodec codec = new VCFCodec(); final VCFHeader header = (VCFHeader) codec.readActualHeader(codec.makeSourceFromStream(new FileInputStream(vcf))); // go through the metadata headers and look for ones that start with the GATK_COMMAND_LINE_KEY VCFHeaderLine versionLine = null; for ( final VCFHeaderLine headerLine : header.getMetaDataInInputOrder()) { if(headerLine.getKey().startsWith(GATKVCFUtils.GATK_COMMAND_LINE_KEY)) { versionLine = headerLine; break; } } Assert.assertNotNull(versionLine); Assert.assertTrue(versionLine.toString().contains("TestPrintVariantsWalker")); }
@Test public void testVCFHeaderSerialization() throws Exception { final VCFFileReader reader = new VCFFileReader(new File("src/test/resources/htsjdk/variant/HiSeq.10000.vcf"), false); final VCFHeader originalHeader = reader.getFileHeader(); reader.close(); final VCFHeader deserializedHeader = TestUtil.serializeAndDeserialize(originalHeader); Assert.assertEquals(deserializedHeader.getMetaDataInInputOrder(), originalHeader.getMetaDataInInputOrder(), "Header metadata does not match before/after serialization"); Assert.assertEquals(deserializedHeader.getContigLines(), originalHeader.getContigLines(), "Contig header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getFilterLines(), originalHeader.getFilterLines(), "Filter header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getFormatHeaderLines(), originalHeader.getFormatHeaderLines(), "Format header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getIDHeaderLines(), originalHeader.getIDHeaderLines(), "ID header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getInfoHeaderLines(), originalHeader.getInfoHeaderLines(), "Info header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getOtherHeaderLines(), originalHeader.getOtherHeaderLines(), "Other header lines do not match before/after serialization"); Assert.assertEquals(deserializedHeader.getGenotypeSamples(), originalHeader.getGenotypeSamples(), "Genotype samples not the same before/after serialization"); Assert.assertEquals(deserializedHeader.samplesWereAlreadySorted(), originalHeader.samplesWereAlreadySorted(), "Sortedness of samples not the same before/after serialization"); Assert.assertEquals(deserializedHeader.getSampleNamesInOrder(), originalHeader.getSampleNamesInOrder(), "Sorted list of sample names in header not the same before/after serialization"); Assert.assertEquals(deserializedHeader.getSampleNameToOffset(), originalHeader.getSampleNameToOffset(), "Sample name to offset map not the same before/after serialization"); Assert.assertEquals(deserializedHeader.toString(), originalHeader.toString(), "String representation of header not the same before/after serialization"); }