public static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict) { return withUpdatedContigsAsLines(oldLines, referenceFile, refDict, false); }
/** * Create VCFHeaderLines for each refDict entry, and optionally the assembly if referenceFile != null * @param refDict reference dictionary * @param referenceFile for assembly name. May be null * @return list of vcf contig header lines */ public static List<VCFContigHeaderLine> makeContigHeaderLines(final SAMSequenceDictionary refDict, final File referenceFile) { final List<VCFContigHeaderLine> lines = new ArrayList<VCFContigHeaderLine>(); final String assembly = referenceFile != null ? getReferenceAssembly(referenceFile.getName()) : null; for ( SAMSequenceRecord contig : refDict.getSequences() ) lines.add(makeContigHeaderLine(contig, assembly)); return lines; }
public static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict, boolean referenceNameOnly) { final Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>(oldLines.size()); for ( final VCFHeaderLine line : oldLines ) { if ( line instanceof VCFContigHeaderLine ) continue; // skip old contig lines if ( line.getKey().equals(VCFHeader.REFERENCE_KEY) ) continue; // skip the old reference key lines.add(line); } for ( final VCFHeaderLine contigLine : makeContigHeaderLines(refDict, referenceFile) ) lines.add(contigLine); String referenceValue; if (referenceFile != null) { if (referenceNameOnly) { int extensionStart = referenceFile.getName().lastIndexOf("."); referenceValue = extensionStart == -1 ? referenceFile.getName() : referenceFile.getName().substring(0, extensionStart); } else { referenceValue = "file://" + referenceFile.getAbsolutePath(); } lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, referenceValue)); } return lines; }
@Test public void testHeadersWhereOneIsAStrictSubsetOfTheOther() { VCFHeader one = createHeader(VCF4headerStrings); VCFHeader two = createHeader(VCF4headerStringsSmallSubset); ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>(); headers.add(one); headers.add(two); Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, false); Assert.assertEquals(lines.size(), VCF4headerStringCount); }
/** * This method makes a copy of the input VCF and creates an index file for it in the same location. * This is done so that we don't need to store the index file in the same repo * The copy of the input is done so that it and its index are in the same directory which is typically required. * * @param vcfFile the vcf file to index * @return File a vcf file (index file is created in same path). */ public static File createTemporaryIndexedVcfFromInput(final File vcfFile, final String tempFilePrefix) throws IOException { final String extension; if (vcfFile.getAbsolutePath().endsWith(IOUtil.VCF_FILE_EXTENSION)) extension = IOUtil.VCF_FILE_EXTENSION; else if (vcfFile.getAbsolutePath().endsWith(IOUtil.COMPRESSED_VCF_FILE_EXTENSION)) extension = IOUtil.COMPRESSED_VCF_FILE_EXTENSION; else throw new IllegalArgumentException("couldn't find a " + IOUtil.VCF_FILE_EXTENSION + " or " + IOUtil.COMPRESSED_VCF_FILE_EXTENSION + " ending for input file " + vcfFile.getAbsolutePath()); File output = createTemporaryIndexedVcfFile(tempFilePrefix, extension); try (final VCFFileReader in = new VCFFileReader(vcfFile, false); final VariantContextWriter out = new VariantContextWriterBuilder(). setReferenceDictionary(in.getFileHeader().getSequenceDictionary()). setOptions(EnumSet.of(Options.INDEX_ON_THE_FLY)). setOutputFile(output).build()) { out.writeHeader(in.getFileHeader()); for (final VariantContext ctx : in) { out.add(ctx); } } return output; }
@Test(expectedExceptions=IllegalStateException.class) public void testHeadersInfoDifferentValues() { VCFHeader one = createHeader(VCF4headerStrings); VCFHeader two = createHeader(VCF4headerStringsBrokenInfo); ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>(); headers.add(one); headers.add(two); Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, false); Assert.assertEquals(lines.size(), VCF4headerStringCount); }
/** * This method makes a copy of the input VCF and creates an index file for it in the same location. * This is done so that we don't need to store the index file in the same repo * The copy of the input is done so that it and its index are in the same directory which is typically required. * * @param vcfFile the vcf file to index * @return File a vcf file (index file is created in same path). */ public static File createTemporaryIndexedVcfFromInput(final File vcfFile, final String tempFilePrefix) throws IOException { final String extension; if (vcfFile.getAbsolutePath().endsWith(IOUtil.VCF_FILE_EXTENSION)) extension = IOUtil.VCF_FILE_EXTENSION; else if (vcfFile.getAbsolutePath().endsWith(IOUtil.COMPRESSED_VCF_FILE_EXTENSION)) extension = IOUtil.COMPRESSED_VCF_FILE_EXTENSION; else throw new IllegalArgumentException("couldn't find a " + IOUtil.VCF_FILE_EXTENSION + " or " + IOUtil.COMPRESSED_VCF_FILE_EXTENSION + " ending for input file " + vcfFile.getAbsolutePath()); File output = createTemporaryIndexedVcfFile(tempFilePrefix, extension); try (final VCFFileReader in = new VCFFileReader(vcfFile, false); final VariantContextWriter out = new VariantContextWriterBuilder(). setReferenceDictionary(in.getFileHeader().getSequenceDictionary()). setOptions(EnumSet.of(Options.INDEX_ON_THE_FLY)). setOutputFile(output).build()) { out.writeHeader(in.getFileHeader()); for (final VariantContext ctx : in) { out.add(ctx); } } return output; }
@Test public void testHeadersFormatDifferentValues() { VCFHeader one = createHeader(VCF4headerStrings); VCFHeader two = createHeader(VCF4headerStringsBrokenFormat); ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>(); headers.add(one); headers.add(two); Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, false); Assert.assertEquals(lines.size(), VCF4headerStringCount); } }
public static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict) { return withUpdatedContigsAsLines(oldLines, referenceFile, refDict, false); }
/** * Create VCFHeaderLines for each refDict entry, and optionally the assembly if referenceFile != null * * @param refDict reference dictionary * @param referenceFile for assembly name. May be null * @return list of vcf contig header lines */ public static List<VCFContigHeaderLine> makeContigHeaderLines(final SAMSequenceDictionary refDict, final File referenceFile) { final List<VCFContigHeaderLine> lines = new ArrayList<>(); final String assembly = referenceFile != null ? getReferenceAssembly(referenceFile.getName()) : null; for (final SAMSequenceRecord contig : refDict.getSequences()) lines.add(makeContigHeaderLine(contig, assembly)); return lines; }
public static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict, final boolean referenceNameOnly) { final Set<VCFHeaderLine> lines = new LinkedHashSet<>(oldLines.size()); for (final VCFHeaderLine line : oldLines) { if (line instanceof VCFContigHeaderLine) continue; // skip old contig lines if (line.getKey().equals(VCFHeader.REFERENCE_KEY)) continue; // skip the old reference key lines.add(line); } for (final VCFHeaderLine contigLine : makeContigHeaderLines(refDict, referenceFile)) lines.add(contigLine); final String referenceValue; if (referenceFile != null) { if (referenceNameOnly) { final int extensionStart = referenceFile.getName().lastIndexOf('.'); referenceValue = extensionStart == -1 ? referenceFile.getName() : referenceFile.getName().substring(0, extensionStart); } else { referenceValue = "file://" + referenceFile.getAbsolutePath(); } lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, referenceValue)); } return lines; }
SET_KEY = null; Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true); if ( SET_KEY != null ) headerLines.add(new VCFInfoHeaderLine(SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record in CombineVariants"));
public static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict) { return withUpdatedContigsAsLines(oldLines, referenceFile, refDict, false); }
/** * Create VCFHeaderLines for each refDict entry, and optionally the assembly if referenceFile != null * * @param refDict reference dictionary * @param referenceFile for assembly name. May be null * @return list of vcf contig header lines */ public static List<VCFContigHeaderLine> makeContigHeaderLines(final SAMSequenceDictionary refDict, final File referenceFile) { final List<VCFContigHeaderLine> lines = new ArrayList<>(); final String assembly = referenceFile != null ? getReferenceAssembly(referenceFile.getName()) : null; for (final SAMSequenceRecord contig : refDict.getSequences()) lines.add(makeContigHeaderLine(contig, assembly)); return lines; }
public static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict, final boolean referenceNameOnly) { final Set<VCFHeaderLine> lines = new LinkedHashSet<>(oldLines.size()); for (final VCFHeaderLine line : oldLines) { if (line instanceof VCFContigHeaderLine) continue; // skip old contig lines if (line.getKey().equals(VCFHeader.REFERENCE_KEY)) continue; // skip the old reference key lines.add(line); } for (final VCFHeaderLine contigLine : makeContigHeaderLines(refDict, referenceFile)) lines.add(contigLine); final String referenceValue; if (referenceFile != null) { if (referenceNameOnly) { final int extensionStart = referenceFile.getName().lastIndexOf('.'); referenceValue = extensionStart == -1 ? referenceFile.getName() : referenceFile.getName().substring(0, extensionStart); } else { referenceValue = "file://" + referenceFile.getAbsolutePath(); } lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, referenceValue)); } return lines; }
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
/** * Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary * * @param oldHeader the header to update * @param referenceFile the file path to the reference sequence used to generate this vcf * @param refDict the SAM formatted reference sequence dictionary */ public static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) { return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples()); }
public void initialize() { // take care of the VCF headers final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit()); final Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true); headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); // needed for gVCFs without DP tags final Set<String> samples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); final VCFHeader vcfHeader = new VCFHeader(headerLines, samples); vcfWriter.writeHeader(vcfHeader); // collect the actual rod bindings into a list for use later for ( final RodBindingCollection<VariantContext> variantCollection : variantCollections ) variants.addAll(variantCollection.getRodBindings()); genomeLocParser = getToolkit().getGenomeLocParser(); // create the annotation engine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationGroupsToUse), annotationsToUse, Collections.<String>emptyList(), this, getToolkit()); //now that we have all the VCF headers, initialize the annotations (this is particularly important to turn off RankSumTest dithering in integration tests) annotationEngine.invokeAnnotationInitializationMethods(headerLines); // optimization to prevent mods when we always just want to break bands if ( multipleAtWhichToBreakBands == 1 ) USE_BP_RESOLUTION = true; }
/** * Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary * * @param oldHeader the header to update * @param referenceFile the file path to the reference sequence used to generate this vcf * @param refDict the SAM formatted reference sequence dictionary */ public static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) { return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples()); }
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true); headerLines.add(new VCFHeaderLine("source", "SelectVariants"));