private List<String> remapFileData(Variant variant, StudyEntry studyEntry, FileEntry fileEntry, VariantOverlappingStatus overlappingStatus) { int capacity = fileAttributes.size() + HBaseToStudyEntryConverter.FILE_INFO_START_IDX; List<String> fileColumn = Arrays.asList(new String[capacity]); Map<String, String> attributes = fileEntry.getAttributes(); fileColumn.set(HBaseToStudyEntryConverter.FILE_CALL_IDX, fileEntry.getCall()); if (addSecondaryAlternates && studyEntry.getSecondaryAlternates() != null && !studyEntry.getSecondaryAlternates().isEmpty()) { fileColumn.set(HBaseToStudyEntryConverter.FILE_SEC_ALTS_IDX, getSecondaryAlternates(variant, studyEntry)); } fileColumn.set(HBaseToStudyEntryConverter.FILE_VARIANT_OVERLAPPING_STATUS_IDX, overlappingStatus.toString()); fileColumn.set(HBaseToStudyEntryConverter.FILE_QUAL_IDX, attributes.get(StudyEntry.QUAL)); fileColumn.set(HBaseToStudyEntryConverter.FILE_FILTER_IDX, attributes.get(StudyEntry.FILTER)); int attributeIdx = HBaseToStudyEntryConverter.FILE_INFO_START_IDX; for (String fileAttribute : fileAttributes) { fileColumn.set(attributeIdx, attributes.get(fileAttribute)); attributeIdx++; } // Trim all leading null values fileColumn = trimLeadingNullValues(fileColumn, HBaseToStudyEntryConverter.FILE_INFO_START_IDX); return fileColumn; }
List<AlternateCoordinate> currAlts = new ArrayList<>(); currAlts.add(getMainAlternate(current)); currAlts.addAll(currentStudy.getSecondaryAlternates()); for (String dupSample : duplicateSamples) { String currGt = getStudy(current).getSampleData(dupSample, getGtKey());
/** * Creates a list with the provided Variant and all secondary alternates {@link AlternateCoordinate} converted to * Variants. * @param v {@link Variant} * @return List of Variant positions. */ public static List<Alternate> expandToVariants(Variant v) { Alternate mainVariant = new Alternate(asVariant(v)); if (v.getStudies().isEmpty()) { return Collections.singletonList(mainVariant); } List<AlternateCoordinate> secondaryAlternates = v.getStudies().get(0).getSecondaryAlternates(); if (secondaryAlternates.isEmpty()) { return Collections.singletonList(mainVariant); } // Check AltCoords as well List<Alternate> list = new ArrayList<>(1 + secondaryAlternates.size()); list.add(mainVariant); secondaryAlternates.forEach(alt -> list.add(new Alternate(asVariant(v, alt)))); return list; }
/** * Build a list of all the alternates from a variant. Includes the main and the secondary alternates. * @param variant * @return */ public List<AlternateCoordinate> buildAltList(Variant variant) { AlternateCoordinate mainAlternate = getMainAlternate(variant); List<AlternateCoordinate> alternates = new ArrayList<>(); boolean emptyRefBlock = mainAlternate.getType().equals(VariantType.NO_VARIATION) && (mainAlternate.getAlternate().isEmpty() || mainAlternate.getAlternate().equals(Allele.NO_CALL_STRING)); // Skip Reference Blocks (NO_VARIATION) where the alternate is empty if (!emptyRefBlock) { alternates.add(mainAlternate); } StudyEntry se = getStudy(variant); if(se.getSecondaryAlternates() != null){ se.getSecondaryAlternates().forEach( alt -> alternates.add(copyAlt(variant, alt))); } return alternates; }
public static boolean hasAnyOverlap(Variant current, Variant other) { if (current.overlapWith(other, true)) { return true; } // SecAlt of query return other.getStudies().stream() .filter( s -> // foreach study s.getSecondaryAlternates().stream() .filter(a -> { // Avoid NPE a = copyAlt(other, a); return current.overlapWith(a.getChromosome(), a.getStart(), a.getEnd(), true); } ) .findAny() .isPresent() ) .findAny() .isPresent(); }
/** * @param variant remains unchanged if the VariantSourceEntry is not inside * @param study stats are written here */ public void calculate(Variant variant, StudyEntry study) { // Map<String, String> infoMap = VariantAggregatedVcfFactory.getInfoMap(info); if (study.getFiles().isEmpty()) { return; } FileEntry fileEntry = study.getFiles().get(0); Map<String, String> infoMap = fileEntry.getAttributes(); int numAllele = 0; String reference = variant.getReference(); String[] alternateAlleles; if (study.getSecondaryAlternates().isEmpty()) { alternateAlleles = new String[]{variant.getAlternate()}; } else { List<String> secondaryAlternates = study.getSecondaryAlternates().stream().map(AlternateCoordinate::getAlternate).collect(Collectors.toList()); secondaryAlternates.add(0, variant.getAlternate()); alternateAlleles = secondaryAlternates.toArray(new String[secondaryAlternates.size()]); } if (tagMap != null) { parseMappedStats(variant, study, numAllele, reference, alternateAlleles, infoMap); } else { parseStats(variant, study, numAllele, reference, alternateAlleles, infoMap); } }
/** * Adjust start/end if a reference base is required due to an empty allele. All variants are checked due to SecAlts. * @param variant {@link Variant} object. * @return Pair<Integer, Integer> The adjusted (or same) start/end position e.g. SV and MNV as SecAlt, INDEL, etc. */ protected Pair<Integer, Integer> adjustedVariantStart(Variant variant) { Integer start = variant.getStart(); Integer end = variant.getEnd(); if (StringUtils.isBlank(variant.getReference()) || StringUtils.isBlank(variant.getAlternate())) { start = start - 1; } for (AlternateCoordinate alternateCoordinate : variant.getStudy(this.studyConfiguration.getStudyName()).getSecondaryAlternates()) { start = Math.min(start, alternateCoordinate.getStart()); end = Math.max(end, alternateCoordinate.getEnd()); if (StringUtils.isBlank(alternateCoordinate.getAlternate()) || StringUtils.isBlank(alternateCoordinate.getReference())) { start = Math.min(start, alternateCoordinate.getStart() - 1); } } return new ImmutablePair<>(start, end); }
private String getSecondaryAlternates(Variant variant, StudyEntry studyEntry) { StringBuilder sb = new StringBuilder(); Iterator<AlternateCoordinate> iterator = studyEntry.getSecondaryAlternates().iterator(); while (iterator.hasNext()) { AlternateCoordinate alt = iterator.next(); sb.append(alt.getChromosome() == null ? variant.getChromosome() : alt.getChromosome()); sb.append(ALTERNATE_COORDINATE_SEPARATOR); sb.append(alt.getStart() == null ? variant.getStart() : alt.getStart()); sb.append(ALTERNATE_COORDINATE_SEPARATOR); sb.append(alt.getEnd() == null ? variant.getEnd() : alt.getEnd()); sb.append(ALTERNATE_COORDINATE_SEPARATOR); sb.append(alt.getReference() == null ? variant.getReference() : alt.getReference()); sb.append(ALTERNATE_COORDINATE_SEPARATOR); sb.append(alt.getAlternate() == null ? variant.getAlternate() : alt.getAlternate()); sb.append(ALTERNATE_COORDINATE_SEPARATOR); sb.append(alt.getType() == null ? variant.getType() : alt.getType()); if (iterator.hasNext()) { sb.append(','); } } return sb.toString(); }
public List<String> buildAlleles(Variant variant, Pair<Integer, Integer> adjustedRange) { String reference = variant.getReference(); String alternate = variant.getAlternate(); List<AlternateCoordinate> secAlts = variant.getStudy(this.studyConfiguration.getStudyName()).getSecondaryAlternates(); List<String> alleles = new ArrayList<>(secAlts.size() + 2); Integer origStart = variant.getStart(); Integer origEnd = variant.getEnd(); alleles.add(buildAllele(variant.getChromosome(), origStart, origEnd, reference, adjustedRange)); alleles.add(buildAllele(variant.getChromosome(), origStart, origEnd, alternate, adjustedRange)); secAlts.forEach(alt -> { alleles.add(buildAllele(variant.getChromosome(), alt.getStart(), alt.getEnd(), alt.getAlternate(), adjustedRange)); }); return alleles; }
@Test public void resolveSameVariantWithSecAltInsertion() throws Exception { Variant a = getVariant("2:10048155:-:AT", "PASS", "220", "1/2"); Variant b = getVariant("2:10048155:ATATATATATAT:-", "PASS", "220", "2/1"); a.getStudies().get(0).getSecondaryAlternates().add(new AlternateCoordinate("2", b.getStart(), b.getEnd(), b.getReference(), b.getAlternate(), INDEL)); b.getStudies().get(0).getSecondaryAlternates().add(new AlternateCoordinate("2", a.getStart(), a.getEnd(), a.getReference(), a.getAlternate(), INDEL)); List<Variant> resolved = new ArrayList<>(new VariantLocalConflictResolver().resolveConflicts(Arrays.asList(a, b))); System.out.println("a.toString() = " + a.toString()); System.out.println("b.getStudies().get(0).getSecondaryAlternates().get(0).toString() = " + b.getStudies().get(0).getSecondaryAlternates().get(0).toString()); assertEquals(1, resolved.size()); assertEquals(1, resolved.get(0).getStudies().get(0).getSecondaryAlternates().size()); assertEquals("1/2", resolved.get(0).getStudies().get(0).getSamplesData().get(0).get(0)); }
@Override public List<String> buildAlleles(Variant variant, Pair<Integer, Integer> adjustedRange, Map<Integer, Character> referenceAlleles) { String reference = variant.getReference(); String alternate = variant.getAlternate(); if (variant.getSv() != null && variant.getSv().getType() == StructuralVariantType.TANDEM_DUPLICATION && alternate.equals(VariantBuilder.DUP_ALT)) { alternate = VariantBuilder.DUP_TANDEM_ALT; } if (variant.getType().equals(VariantType.NO_VARIATION)) { return Arrays.asList(reference, "."); } StudyEntry study = getStudy(variant); List<AlternateCoordinate> secAlts = study.getSecondaryAlternates(); List<String> alleles = new ArrayList<>(secAlts.size() + 2); int origStart = variant.getStart(); int origEnd; if (variant.getLength() == Variant.UNKNOWN_LENGTH) { // Variant::getLengthReference would return UNKNOWN_LENGTH, as the reference could have incomplete reference length origEnd = variant.getStart() + variant.getReference().length() - 1; } else { origEnd = variant.getEnd(); } alleles.add(buildAllele(variant.getChromosome(), origStart, origEnd, reference, adjustedRange, referenceAlleles)); alleles.add(buildAllele(variant.getChromosome(), origStart, origEnd, alternate, adjustedRange, referenceAlleles)); secAlts.forEach(alt -> { int alternateStart = alt.getStart() == null ? variant.getStart() : alt.getStart().intValue(); int alternateEnd = alt.getEnd() == null ? variant.getEnd() : alt.getEnd().intValue(); alleles.add(buildAllele(variant.getChromosome(), alternateStart, alternateEnd, alt.getAlternate(), adjustedRange, referenceAlleles)); }); return alleles; }
public void checkNewMultiAllelicVariants(VariantHadoopDBAdaptor dbAdaptor) { Variant v = dbAdaptor.get(new Query(VariantQueryParam.ID.key(), "1:10297:C:G").append(VariantQueryParam.UNKNOWN_GENOTYPE.key(), "?"), null).first(); assertEquals(1, v.getStudies().get(0).getSecondaryAlternates().size()); assertEquals("0/1", v.getStudies().get(0).getSampleData("NA12877", "GT")); assertEquals("0/2", v.getStudies().get(0).getSampleData("NA12878", "GT")); v = dbAdaptor.get(new Query(VariantQueryParam.ID.key(), "1:10297:C:T").append(VariantQueryParam.UNKNOWN_GENOTYPE.key(), "?"), null).first(); assertEquals(1, v.getStudies().get(0).getSecondaryAlternates().size()); assertEquals("0/2", v.getStudies().get(0).getSampleData("NA12877", "GT")); assertEquals("0/1", v.getStudies().get(0).getSampleData("NA12878", "GT")); }
public void checkNewMissingPositions(VariantHadoopDBAdaptor dbAdaptor) { Variant v; v = dbAdaptor.get(new Query(VariantQueryParam.ID.key(), "1:10821:T:A").append(VariantQueryParam.UNKNOWN_GENOTYPE.key(), "?"), null).first(); assertEquals(0, v.getStudies().get(0).getSecondaryAlternates().size()); assertEquals("./.", v.getStudies().get(0).getSampleData("NA12878", "GT")); assertEquals("./.", v.getStudies().get(0).getSampleData("NA12880", "GT")); v = dbAdaptor.get(new Query(VariantQueryParam.ID.key(), "1:10635:C:G").append(VariantQueryParam.UNKNOWN_GENOTYPE.key(), "?"), null).first(); assertEquals(0, v.getStudies().get(0).getSecondaryAlternates().size()); assertEquals("./.", v.getStudies().get(0).getSampleData("NA12880", "GT")); }
public static Variant getVariant(String var) { Variant v = new Variant(var); StudyEntry sb = new StudyEntry("1", "1"); String call = v.getStart() + ":" + v.getReference() + ":" + v.getAlternate() + ":" + 0; sb.setFiles(Collections.singletonList(new FileEntry("1", "", new HashMap<>()))); v.setStudies(Collections.singletonList(sb)); if (v.getAlternate().contains(",")) { String[] alternates = v.getAlternate().split(","); v.setAlternate(alternates[0]); for (int i = 1; i < alternates.length; i++) { v.getStudies().get(0).getSecondaryAlternates().add(new AlternateCoordinate(null, null, null, null, alternates[i], null)); } } return v; }
@Test public void resolveSameVariantWithSecAlt() throws Exception { Variant a = getVariant("2:10048155:TCTTTTTTTT:AC", "PASS", "220", "1/2"); Variant b = getVariant("2:10048155:TCTTTTTTTT:-", "PASS", "220", "2/1"); a.getStudies().get(0).getSecondaryAlternates().add(new AlternateCoordinate("2", b.getStart(), b.getEnd(), b.getReference(), b.getAlternate(), INDEL)); a.getStudies().get(0).getFiles().get(0).setCall("10048155:TTCTTTTTTTT:TAC,T:0"); b.getStudies().get(0).getSecondaryAlternates().add(new AlternateCoordinate("2", a.getStart(), a.getEnd(), a.getReference(), a.getAlternate(), INDEL)); b.getStudies().get(0).getFiles().get(0).setCall("10048155:TTCTTTTTTTT:TAC,T:1"); Collection<Variant> resolved = new VariantLocalConflictResolver().resolveConflicts(Arrays.asList(a, b)); assertEquals(1, resolved.size()); }
@Test public void testFillGapsConflictingFilesNonRef() throws Exception { StudyConfiguration studyConfiguration = load(new QueryOptions(), Arrays.asList( getResourceUri("gaps2/file1.genome.vcf"), getResourceUri("gaps2/file2.genome.vcf"))); checkConflictingFiles(studyConfiguration); VariantDBAdaptor dbAdaptor = variantStorageEngine.getDBAdaptor(); Variant variantMulti = dbAdaptor.get(new Query(VariantQueryParam.ID.key(), "1:10035:A:G"), null).first(); assertEquals("0/0", variantMulti.getStudies().get(0).getSampleData("s1", "GT")); assertEquals(new AlternateCoordinate("1", 10035, 10035, "A", "<*>", VariantType.NO_VARIATION), variantMulti.getStudies().get(0).getSecondaryAlternates().get(0)); assertEquals("4,0,1", variantMulti.getStudies().get(0).getSampleData("s1", "AD")); assertEquals("0/1", variantMulti.getStudies().get(0).getSampleData("s2", "GT")); assertEquals("13,23,0", variantMulti.getStudies().get(0).getSampleData("s2", "AD")); }
public void checkConflictingFiles(StudyConfiguration studyConfiguration) throws Exception { HadoopVariantStorageEngine variantStorageEngine = (HadoopVariantStorageEngine) this.variantStorageEngine; VariantHadoopDBAdaptor dbAdaptor = variantStorageEngine.getDBAdaptor(); List<Integer> sampleIds = new ArrayList<>(studyConfiguration.getSampleIds().values()); sampleIds.sort(Integer::compareTo); fillGaps(variantStorageEngine, studyConfiguration, sampleIds); printVariants(dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyId(), null).first(), dbAdaptor, newOutputUri(1)); checkFillGaps(studyConfiguration, dbAdaptor, sampleIds, Collections.singleton("1:10020:A:T")); checkSampleIndexTable(dbAdaptor); Variant variantGap = dbAdaptor.get(new Query(VariantQueryParam.ID.key(), "1:10020:A:T"), null).first(); assertEquals("0/1", variantGap.getStudies().get(0).getSampleData("s1", "GT")); assertEquals(GenotypeClass.UNKNOWN_GENOTYPE, variantGap.getStudies().get(0).getSampleData("s2", "GT")); Variant variantMulti = dbAdaptor.get(new Query(VariantQueryParam.ID.key(), "1:10012:TTT:-"), null).first(); assertEquals("<*>", variantMulti.getStudies().get(0).getSecondaryAlternates().get(0).getAlternate()); assertEquals("0/1", variantMulti.getStudies().get(0).getSampleData("s1", "GT")); assertEquals("2/2", variantMulti.getStudies().get(0).getSampleData("s2", "GT")); Variant variantNonMulti = dbAdaptor.get(new Query(VariantQueryParam.ID.key(), "1:10054:A:G"), null).first(); assertEquals(new HashSet<>(Arrays.asList("C", "T")), variantNonMulti.getStudies().get(0).getSecondaryAlternates().stream().map(AlternateCoordinate::getAlternate).collect(Collectors.toSet())); assertEquals("2/3", variantNonMulti.getStudies().get(0).getSampleData("s1", "GT")); assertEquals("0/1", variantNonMulti.getStudies().get(0).getSampleData("s2", "GT")); }
/** * Adjust start/end if a reference base is required due to an empty allele. All variants are checked due to SecAlts. * @param variant {@link Variant} object. * @param study Study * @return Pair<Integer, Integer> The adjusted (or same) start/end position e.g. SV and MNV as SecAlt, INDEL, etc. */ public static Pair<Integer, Integer> adjustedVariantStart(Variant variant, StudyEntry study, Map<Integer, Character> referenceAlleles) { if (variant.getType().equals(VariantType.NO_VARIATION)) { return new ImmutablePair<>(variant.getStart(), variant.getEnd()); } MutablePair<Integer, Integer> pos = adjustedVariantStart(variant.getStart(), variant.getEnd(), variant.getReference(), variant.getAlternate(), referenceAlleles, null); for (AlternateCoordinate alternateCoordinate : study.getSecondaryAlternates()) { int alternateStart = alternateCoordinate.getStart() == null ? variant.getStart() : alternateCoordinate.getStart().intValue(); int alternateEnd = alternateCoordinate.getEnd() == null ? variant.getEnd() : alternateCoordinate.getEnd().intValue(); String reference = alternateCoordinate.getReference() == null ? variant.getReference() : alternateCoordinate.getReference(); String alternate = alternateCoordinate.getAlternate() == null ? variant.getAlternate() : alternateCoordinate.getAlternate(); adjustedVariantStart(alternateStart, alternateEnd, reference, alternate, referenceAlleles, pos); } return pos; }
@Test public void resolve_INS_SNP_SEC_ALT() throws Exception { Variant a = getVariant("1:100:-:GGTTG", "PASS", "390", "1/2"); Variant b = getVariant("1:100:G:-", "PASS", "390", "0/0"); Variant c = getVariant("1:102:-:AGGA", "PASS", "390", "0/1"); b.setType(NO_VARIATION); a.getStudies().get(0).getSecondaryAlternates().add( new AlternateCoordinate(c.getChromosome(), c.getStart(), c.getEnd(), c.getReference(), c.getAlternate(), INDEL)); a.getStudies().get(0).getFiles().get(0).setCall("100:TT:GGTTGTT,TTAGGA:0"); b.getStudies().get(0).getFiles().get(0).setCall("100:TT:GGTTGTT,TTAGGA:0"); c.getStudies().get(0).getFiles().get(0).setCall("100:TT:GGTTGTT,TTAGGA:1"); Collection<Variant> resolved = new VariantLocalConflictResolver().resolveConflicts(Arrays.asList(a, b, c)); assertEquals(2, resolved.size()); }
@Test public void resolve_DEL_DEL_SEC_ALT() throws Exception { Variant a = getVariant("1:100:-:GG", "PASS", "390", "1/2"); Variant b = getVariant("1:100:-:GG", "PASS", "390", "1/2"); Variant c = getVariant("1:100:-:GA", "PASS", "390", "1/2"); // mix with a Variant d = getVariant("1:100:-:GT", "PASS", "390", "1/2"); // mix with b a.getStudies().get(0).getSecondaryAlternates().add( new AlternateCoordinate(c.getChromosome(), c.getStart(), c.getEnd(), c.getReference(), c.getAlternate(), INDEL)); c.getStudies().get(0).getSecondaryAlternates().add( new AlternateCoordinate(a.getChromosome(), a.getStart(), a.getEnd(), a.getReference(), a.getAlternate(), INDEL)); b.getStudies().get(0).getSecondaryAlternates().add( new AlternateCoordinate(d.getChromosome(), d.getStart(), d.getEnd(), d.getReference(), d.getAlternate(), INDEL)); d.getStudies().get(0).getSecondaryAlternates().add( new AlternateCoordinate(b.getChromosome(), b.getStart(), b.getEnd(), b.getReference(), b.getAlternate(), INDEL)); Collection<Variant> resolved = new VariantLocalConflictResolver().resolveConflicts(Arrays.asList(a, b, c, d)); System.out.println("resolved = " + resolved); assertEquals(1, resolved.size()); }