private VCFHeaderLine convertTo(String key, String value) { return new VCFHeaderLine(key.toString(), value.toString()); }
private static Set<VCFHeaderLine> makeGetMetaDataSet(final Set<VCFHeaderLine> headerLinesInSomeOrder) { final Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>(); lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_2.getFormatString(), VCFHeaderVersion.VCF4_2.getVersionString())); lines.addAll(headerLinesInSomeOrder); return Collections.unmodifiableSet(lines); }
private static Set<VCFHeaderLine> makeGetMetaDataSet(final Set<VCFHeaderLine> headerLinesInSomeOrder) { final Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>(); lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_2.getFormatString(), VCFHeaderVersion.VCF4_2.getVersionString())); lines.addAll(headerLinesInSomeOrder); return Collections.unmodifiableSet(lines); }
private static Set<VCFHeaderLine> makeGetMetaDataSet(final Set<VCFHeaderLine> headerLinesInSomeOrder) { final Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>(); lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_1.getFormatString(), VCFHeaderVersion.VCF4_1.getVersionString())); lines.addAll(headerLinesInSomeOrder); return Collections.unmodifiableSet(lines); }
public static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict, boolean referenceNameOnly) { final Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>(oldLines.size()); for ( final VCFHeaderLine line : oldLines ) { if ( line instanceof VCFContigHeaderLine ) continue; // skip old contig lines if ( line.getKey().equals(VCFHeader.REFERENCE_KEY) ) continue; // skip the old reference key lines.add(line); } for ( final VCFHeaderLine contigLine : makeContigHeaderLines(refDict, referenceFile) ) lines.add(contigLine); String referenceValue; if (referenceFile != null) { if (referenceNameOnly) { int extensionStart = referenceFile.getName().lastIndexOf("."); referenceValue = extensionStart == -1 ? referenceFile.getName() : referenceFile.getName().substring(0, extensionStart); } else { referenceValue = "file://" + referenceFile.getAbsolutePath(); } lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, referenceValue)); } return lines; }
public static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict, final boolean referenceNameOnly) { final Set<VCFHeaderLine> lines = new LinkedHashSet<>(oldLines.size()); for (final VCFHeaderLine line : oldLines) { if (line instanceof VCFContigHeaderLine) continue; // skip old contig lines if (line.getKey().equals(VCFHeader.REFERENCE_KEY)) continue; // skip the old reference key lines.add(line); } for (final VCFHeaderLine contigLine : makeContigHeaderLines(refDict, referenceFile)) lines.add(contigLine); final String referenceValue; if (referenceFile != null) { if (referenceNameOnly) { final int extensionStart = referenceFile.getName().lastIndexOf('.'); referenceValue = extensionStart == -1 ? referenceFile.getName() : referenceFile.getName().substring(0, extensionStart); } else { referenceValue = "file://" + referenceFile.getAbsolutePath(); } lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, referenceValue)); } return lines; }
public static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict, final boolean referenceNameOnly) { final Set<VCFHeaderLine> lines = new LinkedHashSet<>(oldLines.size()); for (final VCFHeaderLine line : oldLines) { if (line instanceof VCFContigHeaderLine) continue; // skip old contig lines if (line.getKey().equals(VCFHeader.REFERENCE_KEY)) continue; // skip the old reference key lines.add(line); } for (final VCFHeaderLine contigLine : makeContigHeaderLines(refDict, referenceFile)) lines.add(contigLine); final String referenceValue; if (referenceFile != null) { if (referenceNameOnly) { final int extensionStart = referenceFile.getName().lastIndexOf('.'); referenceValue = extensionStart == -1 ? referenceFile.getName() : referenceFile.getName().substring(0, extensionStart); } else { referenceValue = "file://" + referenceFile.getAbsolutePath(); } lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, referenceValue)); } return lines; }
private static VariantContextWriter getVariantContextWriter(final File outputFile, final File referenceSequenceFileName, final String sample, final String source, final ReferenceSequenceFile ref) { final VariantContextWriter variantContextWriter = new VariantContextWriterBuilder() .setReferenceDictionary(ref.getSequenceDictionary()) .setOutputFile(outputFile).build(); final Set<VCFHeaderLine> lines = new LinkedHashSet<>(); lines.add(new VCFHeaderLine("reference", referenceSequenceFileName.getAbsolutePath())); lines.add(new VCFHeaderLine("source", source)); lines.add(new VCFHeaderLine("fileDate", new Date().toString())); lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_PL_KEY)); lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS)); lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY)); final VCFHeader header = new VCFHeader(lines, Collections.singletonList(sample)); header.setSequenceDictionary(ref.getSequenceDictionary()); variantContextWriter.writeHeader(header); return variantContextWriter; }
public VCFHeaderLine toVCFHeaderLine() { // Need to uniquify the key for the header line using the min/max GQ, since // VCFHeader does not allow lines with duplicate keys. final String key = String.format("GVCFBlock%d-%d", getGQLowerBound(), getGQUpperBound()); return new VCFHeaderLine(key, "minGQ=" + getGQLowerBound() + "(inclusive),maxGQ=" + getGQUpperBound() + "(exclusive)"); }
private static VariantContextWriter getVariantContextWriter(final File outputFile, final File referenceSequenceFileName, final String sample, final String source, final ReferenceSequenceFile ref) { final VariantContextWriter variantContextWriter = new VariantContextWriterBuilder() .setReferenceDictionary(ref.getSequenceDictionary()) .setOutputFile(outputFile).build(); final Set<VCFHeaderLine> lines = new LinkedHashSet<>(); lines.add(new VCFHeaderLine("reference", referenceSequenceFileName.getAbsolutePath())); lines.add(new VCFHeaderLine("source", source)); lines.add(new VCFHeaderLine("fileDate", new Date().toString())); lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_PL_KEY)); lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS)); lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY)); final VCFHeader header = new VCFHeader(lines, Collections.singletonList(sample)); header.setSequenceDictionary(ref.getSequenceDictionary()); variantContextWriter.writeHeader(header); return variantContextWriter; }
/** * Parse the familial relationship specification, build the transmission matrices and initialize VCF writer */ public void initialize() { ArrayList<String> rodNames = new ArrayList<String>(); rodNames.add(variantCollection.variants.getName()); Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); //Get the trios from the families passed as ped setTrios(vcfSamples); if(trios.size()<1) throw new UserException.BadInput("No PED file passed or no *non-skipped* trios found in PED file. Aborted."); Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>(); headerLines.addAll(GATKVCFUtils.getHeaderFields(this.getToolkit())); headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.TRANSMISSION_PROBABILITY_KEY)); headerLines.add(new VCFHeaderLine("source", SOURCE_NAME)); vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples)); buildMatrices(); if(mvFile != null) mvFile.println("CHROM\tPOS\tAC\tFAMILY\tTP\tMOTHER_GT\tMOTHER_DP\tMOTHER_AD\tMOTHER_PL\tFATHER_GT\tFATHER_DP\tFATHER_AD\tFATHER_PL\tCHILD_GT\tCHILD_DP\tCHILD_AD\tCHILD_PL"); }
/** * create a fake header of known quantity * @param metaData the header lines * @param additionalColumns the additional column names * @return a fake VCF header */ public static VCFHeader createFakeHeader(final Set<VCFHeaderLine> metaData, final Set<String> additionalColumns, final SAMSequenceDictionary sequenceDict) { metaData.add(new VCFHeaderLine("two", "2")); additionalColumns.add("extra1"); additionalColumns.add("extra2"); final VCFHeader ret = new VCFHeader(metaData, additionalColumns); ret.setSequenceDictionary(sequenceDict); return ret; }
/** * create a fake header of known quantity * @param metaData the header lines * @param additionalColumns the additional column names * @return a fake VCF header */ private static VCFHeader createFakeHeader(final Set<VCFHeaderLine> metaData, final Set<String> additionalColumns, final SAMSequenceDictionary sequenceDict) { metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString())); metaData.add(new VCFHeaderLine("two", "2")); additionalColumns.add("extra1"); additionalColumns.add("extra2"); final VCFHeader ret = new VCFHeader(metaData, additionalColumns); ret.setSequenceDictionary(sequenceDict); return ret; }
@Test public void testCreateDictionary() { final List<VCFHeaderLine> inputLines = new ArrayList<VCFHeaderLine>(); int counter = 0; inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++))); inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++))); inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter)); inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter)); inputLines.add(new VCFInfoHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x")); inputLines.add(new VCFInfoHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x")); inputLines.add(new VCFHeaderLine("x", "misc")); inputLines.add(new VCFHeaderLine("y", "misc")); inputLines.add(new VCFSimpleHeaderLine("GATKCommandLine","z","misc")); inputLines.add(new VCFFormatHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x")); inputLines.add(new VCFFormatHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x")); final int inputLineCounter = counter; final VCFHeader inputHeader = new VCFHeader(new LinkedHashSet<VCFHeaderLine>(inputLines)); final ArrayList<String> dict = BCF2Utils.makeDictionary(inputHeader); final int dict_size = dict.size(); Assert.assertEquals(7,dict_size); }
@Test public void testVCFHeaderAddOtherLine() { final VCFHeader header = getHiSeqVCFHeader(); final VCFHeaderLine otherLine = new VCFHeaderLine("TestOtherLine", "val"); header.addMetaDataLine(otherLine); Assert.assertTrue(header.getOtherHeaderLines().contains(otherLine), "TestOtherLine not found in other header lines"); Assert.assertTrue(header.getMetaDataInInputOrder().contains(otherLine), "TestOtherLine not found in set of all header lines"); Assert.assertNotNull(header.getOtherHeaderLine("TestOtherLine"), "Lookup for TestOtherLine by key failed"); Assert.assertFalse(header.getInfoHeaderLines().contains(otherLine), "TestOtherLine present in info header lines"); Assert.assertFalse(header.getFormatHeaderLines().contains(otherLine), "TestOtherLine present in format header lines"); Assert.assertFalse(header.getContigLines().contains(otherLine), "TestOtherLine present in contig header lines"); Assert.assertFalse(header.getFilterLines().contains(otherLine), "TestOtherLine present in filter header lines"); }
@Test public void testVCFHeaderAddDuplicateHeaderLine() { File input = new File("src/test/resources/htsjdk/variant/ex2.vcf"); VCFFileReader reader = new VCFFileReader(input, false); VCFHeader header = reader.getFileHeader(); VCFHeaderLine newHeaderLine = new VCFHeaderLine("key", "value"); // add this new header line header.addMetaDataLine(newHeaderLine); final int numHeaderLinesBefore = header.getOtherHeaderLines().size(); // readd the same header line header.addMetaDataLine(newHeaderLine); final int numHeaderLinesAfter = header.getOtherHeaderLines().size(); // assert that we have the same number of other header lines before and after Assert.assertEquals(numHeaderLinesBefore, numHeaderLinesAfter); }
public void writeAsVcf(final File output, final File refFile) throws FileNotFoundException { ReferenceSequenceFile ref = new IndexedFastaSequenceFile(refFile); try (VariantContextWriter writer = new VariantContextWriterBuilder() .setOutputFile(output) .setReferenceDictionary(ref.getSequenceDictionary()) .build()) { final VCFHeader vcfHeader = new VCFHeader( VCFUtils.withUpdatedContigsAsLines(Collections.emptySet(), refFile, header.getSequenceDictionary(), false), Collections.singleton(HET_GENOTYPE_FOR_PHASING)); VCFUtils.withUpdatedContigsAsLines(Collections.emptySet(), refFile, header.getSequenceDictionary(), false); vcfHeader.addMetaDataLine(new VCFHeaderLine(VCFHeaderVersion.VCF4_2.getFormatString(), VCFHeaderVersion.VCF4_2.getVersionString())); vcfHeader.addMetaDataLine(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed")); vcfHeader.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); vcfHeader.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.PHASE_SET_KEY, 1, VCFHeaderLineType.String, "Phase-set identifier for phased genotypes.")); vcfHeader.addMetaDataLine(new VCFHeaderLine(VCFHeader.SOURCE_KEY,"HaplotypeMap::writeAsVcf")); vcfHeader.addMetaDataLine(new VCFHeaderLine("reference","HaplotypeMap::writeAsVcf")); // vcfHeader.addMetaDataLine(new VCFHeaderLine()); writer.writeHeader(vcfHeader); final LinkedList<VariantContext> variants = new LinkedList<>(this.asVcf(ref)); variants.sort(vcfHeader.getVCFRecordComparator()); variants.forEach(writer::add); } }
public void writeAsVcf(final File output, final File refFile) throws FileNotFoundException { ReferenceSequenceFile ref = new IndexedFastaSequenceFile(refFile); try (VariantContextWriter writer = new VariantContextWriterBuilder() .setOutputFile(output) .setReferenceDictionary(ref.getSequenceDictionary()) .build()) { final VCFHeader vcfHeader = new VCFHeader( VCFUtils.withUpdatedContigsAsLines(Collections.emptySet(), refFile, header.getSequenceDictionary(), false), Collections.singleton(HET_GENOTYPE_FOR_PHASING)); VCFUtils.withUpdatedContigsAsLines(Collections.emptySet(), refFile, header.getSequenceDictionary(), false); vcfHeader.addMetaDataLine(new VCFHeaderLine(VCFHeaderVersion.VCF4_2.getFormatString(), VCFHeaderVersion.VCF4_2.getVersionString())); vcfHeader.addMetaDataLine(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed")); vcfHeader.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); vcfHeader.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.PHASE_SET_KEY, 1, VCFHeaderLineType.String, "Phase-set identifier for phased genotypes.")); vcfHeader.addMetaDataLine(new VCFHeaderLine(VCFHeader.SOURCE_KEY,"HaplotypeMap::writeAsVcf")); vcfHeader.addMetaDataLine(new VCFHeaderLine("reference","HaplotypeMap::writeAsVcf")); // vcfHeader.addMetaDataLine(new VCFHeaderLine()); writer.writeHeader(vcfHeader); final LinkedList<VariantContext> variants = new LinkedList<>(this.asVcf(ref)); variants.sort(vcfHeader.getVCFRecordComparator()); variants.forEach(writer::add); } }
/** * create a fake header of known quantity * * @return a fake VCF header */ private static VCFHeader createFakeHeader() { final SAMSequenceDictionary sequenceDict = createArtificialSequenceDictionary(); final Set<VCFHeaderLine> metaData = new HashSet<>(); final Set<String> additionalColumns = new HashSet<>(); metaData.add(new VCFHeaderLine("two", "2")); additionalColumns.add("extra1"); additionalColumns.add("extra2"); final VCFHeader header = new VCFHeader(metaData, additionalColumns); header.addMetaDataLine(new VCFInfoHeaderLine("DP", 1, VCFHeaderLineType.String, "x")); header.addMetaDataLine(new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "x")); header.addMetaDataLine(new VCFFormatHeaderLine("BB", 1, VCFHeaderLineType.String, "x")); header.addMetaDataLine(new VCFFormatHeaderLine("GQ", 1, VCFHeaderLineType.String, "x")); header.setSequenceDictionary(sequenceDict); return header; }
/** * A test to ensure that if we add a line to a VCFHeader it will persist through * a round-trip write/read cycle via VariantContextWriter/VCFFileReader */ @Test public void testModifyHeader() { final File originalVCF = new File("src/test/resources/htsjdk/variant/HiSeq.10000.vcf"); final VCFFileReader reader = new VCFFileReader(originalVCF, false); final VCFHeader header = reader.getFileHeader(); reader.close(); header.addMetaDataLine(new VCFHeaderLine("FOOBAR", "foovalue")); final File outputVCF = createTempFile("testModifyHeader", IOUtil.VCF_FILE_EXTENSION); final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(outputVCF).setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER)).build(); writer.writeHeader(header); writer.close(); final VCFFileReader roundtripReader = new VCFFileReader(outputVCF, false); final VCFHeader roundtripHeader = roundtripReader.getFileHeader(); roundtripReader.close(); Assert.assertNotNull(roundtripHeader.getOtherHeaderLine("FOOBAR"), "Could not find FOOBAR header line after a write/read cycle"); Assert.assertEquals(roundtripHeader.getOtherHeaderLine("FOOBAR").getValue(), "foovalue", "Wrong value for FOOBAR header line after a write/read cycle"); }