private EnumMap<GenotypeType,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){ if(genotype == null || !genotype.isCalled() || genotype.getLikelihoods() == null){ EnumMap<GenotypeType,Double> likelihoods = new EnumMap<GenotypeType, Double>(GenotypeType.class); likelihoods.put(GenotypeType.HOM_REF,1.0/3.0); likelihoods.put(GenotypeType.HET,1.0/3.0); likelihoods.put(GenotypeType.HOM_VAR,1.0/3.0); return likelihoods; } return genotype.getLikelihoods().getAsMap(true); }
public static void assertEquals(final Genotype actual, final Genotype expected) { Assert.assertEquals(actual.getSampleName(), expected.getSampleName(), "Genotype names"); Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "Genotype alleles"); Assert.assertEquals(actual.getGenotypeString(), expected.getGenotypeString(), "Genotype string"); Assert.assertEquals(actual.getType(), expected.getType(), "Genotype type"); // filters are the same Assert.assertEquals(actual.getFilters(), expected.getFilters(), "Genotype fields"); Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "Genotype isFiltered"); // inline attributes Assert.assertEquals(actual.getDP(), expected.getDP(), "Genotype dp"); Assert.assertTrue(Arrays.equals(actual.getAD(), expected.getAD())); Assert.assertEquals(actual.getGQ(), expected.getGQ(), "Genotype gq"); Assert.assertEquals(actual.hasPL(), expected.hasPL(), "Genotype hasPL"); Assert.assertEquals(actual.hasAD(), expected.hasAD(), "Genotype hasAD"); Assert.assertEquals(actual.hasGQ(), expected.hasGQ(), "Genotype hasGQ"); Assert.assertEquals(actual.hasDP(), expected.hasDP(), "Genotype hasDP"); Assert.assertEquals(actual.hasLikelihoods(), expected.hasLikelihoods(), "Genotype haslikelihoods"); Assert.assertEquals(actual.getLikelihoodsString(), expected.getLikelihoodsString(), "Genotype getlikelihoodsString"); Assert.assertEquals(actual.getLikelihoods(), expected.getLikelihoods(), "Genotype getLikelihoods"); Assert.assertTrue(Arrays.equals(actual.getPL(), expected.getPL())); Assert.assertEquals(actual.getGQ(), expected.getGQ(), "Genotype phredScaledQual"); assertAttributesEquals(actual.getExtendedAttributes(), expected.getExtendedAttributes()); Assert.assertEquals(actual.isPhased(), expected.isPhased(), "Genotype isPhased"); Assert.assertEquals(actual.getPloidy(), expected.getPloidy(), "Genotype getPloidy"); }
/** * Convenience function that returns a string representation of the PL field of this * genotype, or . if none is available. * * @return a non-null String representation for the PL of this sample */ public String getLikelihoodsString() { return hasLikelihoods() ? getLikelihoods().toString() : VCFConstants.MISSING_VALUE_v4; }
public static void assertGenotypesAreEqual(final Genotype actual, final Genotype expected) { Assert.assertEquals(actual.getSampleName(), expected.getSampleName(), "Genotype names"); Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "Genotype alleles"); Assert.assertEquals(actual.getGenotypeString(), expected.getGenotypeString(), "Genotype string"); Assert.assertEquals(actual.getType(), expected.getType(), "Genotype type"); // filters are the same Assert.assertEquals(actual.getFilters(), expected.getFilters(), "Genotype fields"); Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "Genotype isFiltered"); // inline attributes Assert.assertEquals(actual.getDP(), expected.getDP(), "Genotype dp"); Assert.assertTrue(Arrays.equals(actual.getAD(), expected.getAD())); Assert.assertEquals(actual.getGQ(), expected.getGQ(), "Genotype gq"); Assert.assertEquals(actual.hasPL(), expected.hasPL(), "Genotype hasPL"); Assert.assertEquals(actual.hasAD(), expected.hasAD(), "Genotype hasAD"); Assert.assertEquals(actual.hasGQ(), expected.hasGQ(), "Genotype hasGQ"); Assert.assertEquals(actual.hasDP(), expected.hasDP(), "Genotype hasDP"); Assert.assertEquals(actual.hasLikelihoods(), expected.hasLikelihoods(), "Genotype haslikelihoods"); Assert.assertEquals(actual.getLikelihoodsString(), expected.getLikelihoodsString(), "Genotype getlikelihoodsString"); Assert.assertEquals(actual.getLikelihoods(), expected.getLikelihoods(), "Genotype getLikelihoods"); Assert.assertTrue(Arrays.equals(actual.getPL(), expected.getPL())); Assert.assertEquals(actual.getPhredScaledQual(), expected.getPhredScaledQual(), "Genotype phredScaledQual"); assertAttributesEquals(actual.getExtendedAttributes(), expected.getExtendedAttributes()); Assert.assertEquals(actual.isPhased(), expected.isPhased(), "Genotype isPhased"); Assert.assertEquals(actual.getPloidy(), expected.getPloidy(), "Genotype getPloidy"); }
/** * Convenience function that returns a string representation of the PL field of this * genotype, or . if none is available. * * @return a non-null String representation for the PL of this sample */ public String getLikelihoodsString() { return hasLikelihoods() ? getLikelihoods().toString() : VCFConstants.MISSING_VALUE_v4; }
Assert.assertEquals(actual.getLikelihoods(), expected.getLikelihoods(), "Genotype getLikelihoods"); Assert.assertTrue(Arrays.equals(actual.getPL(), expected.getPL()));
private double log10PLFromSamples(final VariantContext vc, final String sample, boolean calcRefP) { Genotype g = vc.getGenotype(sample); double log10pSample = -1000; if ( ! g.isNoCall() ) { final double[] gLikelihoods = MathUtils.normalizeFromLog10(g.getLikelihoods().getAsVector()); log10pSample = Math.log10(calcRefP ? gLikelihoods[0] : 1 - gLikelihoods[0]); log10pSample = Double.isInfinite(log10pSample) ? -10000 : log10pSample; } return log10pSample; }
private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int momIdx, final int dadIdx ) { final double likelihoodVector[] = new double[triosToTest.size()]; int iii = 0; for( final Sample child : triosToTest ) { final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector(); final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector(); final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector(); likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx]; } return MathUtils.sumLog10(likelihoodVector); }
/** * Convenience function that returns a string representation of the PL field of this * genotype, or . if none is available. * * @return a non-null String representation for the PL of this sample */ @Ensures("result != null") public String getLikelihoodsString() { return hasLikelihoods() ? getLikelihoods().toString() : VCFConstants.MISSING_VALUE_v4; }
/** * Unpack GenotypesContext into arraylist of double values * @param GLs Input genotype context * @param keepUninformative Don't filter out uninformative genotype likelihoods (i.e. all log likelihoods near 0) * This is useful for VariantContexts with a NON_REF allele * @return ArrayList of doubles corresponding to GL vectors */ protected static ArrayList<double[]> getGLs(final GenotypesContext GLs, final boolean includeDummy, final boolean keepUninformative) { final ArrayList<double[]> genotypeLikelihoods = new ArrayList<>(GLs.size() + 1); if ( includeDummy ) genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy for ( Genotype sample : GLs.iterateInSampleNameOrder() ) { if ( sample.hasLikelihoods() ) { final double[] gls = sample.getLikelihoods().getAsVector(); if ( MathUtils.sum(gls) < GATKVariantContextUtils.SUM_GL_THRESH_NOCALL || keepUninformative ) genotypeLikelihoods.add(gls); } } return genotypeLikelihoods; }
/** * From a given genotype, extract a given subset of alleles and return the new PLs * @param g genotype to subset * @param allelesToUse alleles to subset * @param vc variant context with alleles and genotypes * @param ploidy number of chromosomes * @return the subsetted PLs */ private double[] subsetLikelihoodAlleles(final Genotype g, final List<Allele> allelesToUse, final VariantContext vc, final int ploidy){ // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); final int numNewAltAlleles = allelesToUse.size() - 1; // create the new likelihoods array from the alleles we are allowed to use final double[] originalLikelihoods = g.getLikelihoods().getAsVector(); if ( numOriginalAltAlleles != numNewAltAlleles ) { // might need to re-normalize the new likelihoods return MathUtils.normalizeFromLog10(GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse), false, true); } else return originalLikelihoods; }
else if(genotype == null || !hasCalledGT(genotype.getType()) || genotype.getLikelihoods() == null){ likelihoods = new double[NUM_CALLED_GENOTYPETYPES]; likelihoods[0] = LOG10_OF_ONE_THIRD; likelihoods = GeneralUtils.normalizeFromLog10(genotype.getLikelihoods().getAsVector(),true,true);
@Test(enabled = true, dataProvider = "Models") public void testNoPrior(final AFCalculator model) { for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) { final Genotype AB = makePL(Arrays.asList(A,C), REF_PL, 0, 10000); final double[] flatPriors = new double[]{0.0,0.0,0.0}; // test that function computeAlleleFrequency correctly operates when the flat prior option is set // computeAlleleFrequencyPriors takes linear priors final ArrayList<Double> inputPrior = new ArrayList<Double>(); inputPrior.add(1.0/3); inputPrior.add(1.0/3); final AFPriorProvider log10priorProvider = UnifiedGenotypingEngine.composeAlleleFrequencyPriorProvider(2, 0.0, inputPrior); final double[] noPriors = log10priorProvider.forTotalPloidy(2); GetGLsTest cfgFlatPrior = new GetGLsTest(model, 1, Arrays.asList(AB), flatPriors, "flatPrior"); GetGLsTest cfgNoPrior = new GetGLsTest(model, 1, Arrays.asList(AB), noPriors, "noPrior"); final AFCalculationResult resultTrackerFlat = cfgFlatPrior.execute(); final AFCalculationResult resultTrackerNoPrior = cfgNoPrior.execute(); final double pRefWithNoPrior = AB.getLikelihoods().getAsVector()[0]; final double pHetWithNoPrior = AB.getLikelihoods().getAsVector()[1] - Math.log10(0.5); final double nonRefPost = Math.pow(10, pHetWithNoPrior) / (Math.pow(10, pRefWithNoPrior) + Math.pow(10, pHetWithNoPrior)); final double log10NonRefPost = Math.log10(nonRefPost); if ( ! Double.isInfinite(log10NonRefPost) ) { // check that the no-prior and flat-prior constructions yield same result Assert.assertEquals(resultTrackerFlat.getLog10PosteriorOfAFGT0(), resultTrackerNoPrior.getLog10PosteriorOfAFGT0()); } } }
private boolean checkGQIsGood(Genotype genotype) { if ( genotype.hasGQ() ) { return genotype.getGQ() >= minGenotypeQuality; } else if ( genotype.hasLikelihoods() ) { double log10gq = GenotypeLikelihoods.getGQLog10FromLikelihoods(genotype.getType().ordinal()-1,genotype.getLikelihoods().getAsVector()); return QualityUtils.phredScaleLog10ErrorRate(log10gq) >= minGenotypeQuality; } return minGenotypeQuality <= 0; }
@Test(enabled = true && !DEBUG_ONLY, dataProvider = "Models") public void testBiallelicPriors(final AFCalculator model) { for ( int REF_PL = 10; REF_PL <= 20; REF_PL += 10 ) { final Genotype AB = makePL(Arrays.asList(A,C), REF_PL, 0, 10000); for ( int log10NonRefPrior = 1; log10NonRefPrior < 10*REF_PL; log10NonRefPrior += 1 ) { final double refPrior = 1 - QualityUtils.qualToErrorProb(log10NonRefPrior); final double nonRefPrior = (1-refPrior) / 2; final double[] priors = MathUtils.normalizeFromLog10(MathUtils.toLog10(new double[]{refPrior, nonRefPrior, nonRefPrior}), true); if ( ! Double.isInfinite(priors[1]) ) { GetGLsTest cfg = new GetGLsTest(model, 1, Arrays.asList(AB), priors, "pNonRef" + log10NonRefPrior); final AFCalculationResult resultTracker = cfg.execute(); final int actualAC = resultTracker.getAlleleCountsOfMLE()[0]; final double pRefWithPrior = AB.getLikelihoods().getAsVector()[0] + priors[0]; final double pHetWithPrior = AB.getLikelihoods().getAsVector()[1] + priors[1] - Math.log10(0.5); final double nonRefPost = Math.pow(10, pHetWithPrior) / (Math.pow(10, pRefWithPrior) + Math.pow(10, pHetWithPrior)); final double log10NonRefPost = Math.log10(nonRefPost); if ( ! Double.isInfinite(log10NonRefPost) ) Assert.assertEquals(resultTracker.getLog10PosteriorOfAFGT0(), log10NonRefPost, 1e-2); if ( nonRefPost >= 0.9 ) Assert.assertTrue(resultTracker.isPolymorphic(C, -1)); final int expectedMLEAC = 1; // the MLE is independent of the prior Assert.assertEquals(actualAC, expectedMLEAC, "actual AC with priors " + log10NonRefPrior + " not expected " + expectedMLEAC + " priors " + Utils.join(",", priors)); } } } }
private static double[] log10NormalizedGenotypePosteriors(final Genotype g, final GenotypeLikelihoodCalculator glCalc, final double[] log10AlleleFrequencies) { final double[] log10Likelihoods = g.getLikelihoods().getAsVector(); final double[] log10Posteriors = new IndexRange(0, glCalc.genotypeCount()).mapToDouble(genotypeIndex -> { final GenotypeAlleleCounts gac = glCalc.genotypeAlleleCountsAt(genotypeIndex); return gac.log10CombinationCount() + log10Likelihoods[genotypeIndex] + gac.sumOverAlleleIndicesAndCounts((index, count) -> count * log10AlleleFrequencies[index]); }); return MathUtils.normalizeFromLog10(log10Posteriors, true); }
private void assertGenotypesAreMostlyEqual(GenotypesContext actual, GenotypesContext expected) { if (actual == expected) { return; } if (actual == null || expected == null) { Assert.fail("Maps not equal: expected: " + expected + " and actual: " + actual); } if (actual.size() != expected.size()) { Assert.fail("Maps do not have the same size:" + actual.size() + " != " + expected.size()); } for (Genotype value : actual) { Genotype expectedValue = expected.get(value.getSampleName()); Assert.assertEquals(value.getAlleles(), expectedValue.getAlleles(), "Alleles in Genotype aren't equal"); Assert.assertEquals(value.getGQ(), expectedValue.getGQ(), "GQ values aren't equal"); Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods(), "Either both have likelihoods or both not"); if ( value.hasLikelihoods() ) Assert.assertEquals(value.getLikelihoods().getAsVector(), expectedValue.getLikelihoods().getAsVector(), "Genotype likelihoods aren't equal"); } }
@Override protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final int defaultPloidy, final LikelihoodSum[] likelihoodSums) { final int numOriginalAltAlleles = likelihoodSums.length; final GenotypesContext genotypes = vc.getGenotypes(); for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) { if (!genotype.hasPL()) continue; final double[] gls = genotype.getLikelihoods().getAsVector(); if (MathUtils.sum(gls) >= GATKVariantContextUtils.SUM_GL_THRESH_NOCALL) continue; final int PLindexOfBestGL = MathUtils.maxElementIndex(gls); final double bestToHomRefDiffGL = PLindexOfBestGL == PL_INDEX_OF_HOM_REF ? 0.0 : gls[PLindexOfBestGL] - gls[PL_INDEX_OF_HOM_REF]; final int declaredPloidy = genotype.getPloidy(); final int ploidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy; final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL); // by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele for (int k=1; k < acCount.length;k++) if (acCount[k] > 0 ) likelihoodSums[k-1].sum += acCount[k] * bestToHomRefDiffGL; } }
double[] motherLikelihoods = motherIsCalled? GeneralUtils.normalizeFromLog10(motherGenotype.getLikelihoods().getAsVector()) : uninformativeLikelihoods; double[] fatherLikelihoods = fatherIsCalled? GeneralUtils.normalizeFromLog10(fatherGenotype.getLikelihoods().getAsVector()) : uninformativeLikelihoods; double[] childLikelihoods = childIsCalled? GeneralUtils.normalizeFromLog10(childGenotype.getLikelihoods().getAsVector()) : uninformativeLikelihoods;
@Override @Requires("vc != null && likelihoodSums != null") protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final int defaultPloidy, final LikelihoodSum[] likelihoodSums) { final int numOriginalAltAlleles = likelihoodSums.length; final GenotypesContext genotypes = vc.getGenotypes(); for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) { if (!genotype.hasPL()) continue; final double[] gls = genotype.getLikelihoods().getAsVector(); if (MathUtils.sum(gls) >= GATKVariantContextUtils.SUM_GL_THRESH_NOCALL) continue; final int PLindexOfBestGL = MathUtils.maxElementIndex(gls); final double bestToHomRefDiffGL = PLindexOfBestGL == PL_INDEX_OF_HOM_REF ? 0.0 : gls[PLindexOfBestGL] - gls[PL_INDEX_OF_HOM_REF]; final int declaredPloidy = genotype.getPloidy(); final int ploidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy; final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL); // by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele for (int k=1; k < acCount.length;k++) if (acCount[k] > 0 ) likelihoodSums[k-1].sum += acCount[k] * bestToHomRefDiffGL; } }