@Override protected StudyEntry getStudy(Variant variant) { return variant.getStudy(this.studyNameMap.get(this.studyIdString)); }
@Deprecated public StudyEntry getSourceEntry(String studyId) { return getStudy(studyId); }
@Deprecated public StudyEntry getSourceEntry(String fileId, String studyId) { return getStudy(studyId); }
public List<String> getSampleNames(String studyId) { StudyEntry studyEntry = getStudy(studyId); if (studyEntry == null) { return null; } return studyEntry.getOrderedSamplesName(); }
@Override protected StudyEntry featureValueOf(Variant actual) { return actual.getStudy(study); } };
private static StudyEntry getStudyEntry(Variant variant, String studyId) { StudyEntry studyEntry; if (studyId == null) { if (variant.getStudies().size() != 1) { throw new IllegalArgumentException("Required one Study per variant. Found " + variant.getStudies().size() + " studies instead"); } studyEntry = variant.getStudies().get(0); } else { studyEntry = variant.getStudy(studyId); } return studyEntry; }
public void updateFileStats(List<Variant> variants) { int incompleteVariantStats = 0; for (Variant v : variants) { StudyEntry studyEntry = v.getStudy(studyId); if (studyEntry == null) { // The variant is not contained in this file continue; } try { VariantStats stats = studyEntry.getStats(StudyEntry.DEFAULT_COHORT); if (stats != null) { fileStats.update(stats); } } catch (NullPointerException e) { e.printStackTrace(); incompleteVariantStats++; } } if (incompleteVariantStats != 0) { Logger logger = LoggerFactory.getLogger(VariantSourceStats.class); logger.warn("{} VariantStats have needed members as null", incompleteVariantStats); } }
StudyEntry getStudy(Variant variant) { if (hasStudyId()) { StudyEntry study = variant.getStudy(getStudyId()); if (Objects.isNull(study)) { throw new IllegalStateException("No study found for " + getStudyId()); } return study; } return variant.getStudies().get(0); }
/** * Adjust start/end if a reference base is required due to an empty allele. All variants are checked due to SecAlts. * @param variant {@link Variant} object. * @return Pair<Integer, Integer> The adjusted (or same) start/end position e.g. SV and MNV as SecAlt, INDEL, etc. */ protected Pair<Integer, Integer> adjustedVariantStart(Variant variant) { Integer start = variant.getStart(); Integer end = variant.getEnd(); if (StringUtils.isBlank(variant.getReference()) || StringUtils.isBlank(variant.getAlternate())) { start = start - 1; } for (AlternateCoordinate alternateCoordinate : variant.getStudy(this.studyConfiguration.getStudyName()).getSecondaryAlternates()) { start = Math.min(start, alternateCoordinate.getStart()); end = Math.max(end, alternateCoordinate.getEnd()); if (StringUtils.isBlank(alternateCoordinate.getAlternate()) || StringUtils.isBlank(alternateCoordinate.getReference())) { start = Math.min(start, alternateCoordinate.getStart() - 1); } } return new ImmutablePair<>(start, end); }
public static Variant addGTAndFilter(Variant var, String gt, String filter) { StudyEntry se = var.getStudy("1"); se.setSamplesPosition(Collections.singletonMap("1", 0)); se.setFormat(Arrays.asList(GENOTYPE_KEY, GENOTYPE_FILTER_KEY)); se.setSamplesData(Collections.singletonList(Arrays.asList(gt, filter))); return var; }
public static Variant addAttribute(Variant var, String key, String value) { var.getStudy("1").getFile("1").getAttributes().put(key, value); return var; }
public List<String> buildAlleles(Variant variant, Pair<Integer, Integer> adjustedRange) { String reference = variant.getReference(); String alternate = variant.getAlternate(); List<AlternateCoordinate> secAlts = variant.getStudy(this.studyConfiguration.getStudyName()).getSecondaryAlternates(); List<String> alleles = new ArrayList<>(secAlts.size() + 2); Integer origStart = variant.getStart(); Integer origEnd = variant.getEnd(); alleles.add(buildAllele(variant.getChromosome(), origStart, origEnd, reference, adjustedRange)); alleles.add(buildAllele(variant.getChromosome(), origStart, origEnd, alternate, adjustedRange)); secAlts.forEach(alt -> { alleles.add(buildAllele(variant.getChromosome(), alt.getStart(), alt.getEnd(), alt.getAlternate(), adjustedRange)); }); return alleles; }
public Variant apply(Variant variant) throws Exception { StudyEntry study = variant.getStudy(studyId); if (study == null) { return variant; } VariantStats stats = study.getStats(cohortName); if (stats == null) { return variant; } VariantHardyWeinbergStats hw = new VariantHardyWeinbergStats( stats.getGenotypeCount().getOrDefault(HOM_REF, 0) + stats.getGenotypeCount().getOrDefault(HOM_REF_PHASED, 0), stats.getGenotypeCount().getOrDefault(HET, 0) + stats.getGenotypeCount().getOrDefault(HET_PHASED, 0), stats.getGenotypeCount().getOrDefault(HOM_ALT, 0) + stats.getGenotypeCount().getOrDefault(HOM_ALT_PHASED, 0) ); hw.calculate(); study.addScore(new VariantScore(SCORE_ID, cohortName, null, hw.getChi2(), hw.getpValue())); return variant; } }
public static Variant addGT(Variant var, String gt) { StudyEntry se = var.getStudy("1"); se.addFormat("GT"); se.getFormatPositions(); se.setSamplesPosition(Collections.singletonMap("1", 0)); // se.setFormat(Collections.singletonList("GT")); se.setSamplesData(Collections.singletonList(Collections.singletonList(gt))); return var; }
public void checkSampleData(Variant variant, StudyConfiguration studyConfiguration, Integer fileId, Function<Integer, String> valueProvider, String field) { assertTrue(studyConfiguration.getFileIds().values().contains(fileId)); studyConfiguration.getSamplesInFiles().get(fileId).forEach((sampleId) -> { String sampleName = studyConfiguration.getSampleIds().inverse().get(sampleId); StudyEntry study = variant.getStudy(studyConfiguration.getStudyName()); assertTrue(study.getSamplesName().contains(sampleName)); assertEquals("Variant=" + variant + " StudyId=" + studyConfiguration.getStudyId() + " FileId=" + fileId + " Field=" + field + " Sample=" + sampleName + " (" + sampleId + ")\n"+variant.toJson(), valueProvider.apply(sampleId), study.getSampleData(sampleName, field)); }); }
private void updateVariantSetStats(Variant variant) { StudyEntry study = variant.getStudy(studyId); if (study == null) { return;
@Test public void indexWithOtherFieldsNoGT() throws Exception { //GL:DP:GU:TU:AU:CU StudyConfiguration studyConfiguration = newStudyConfiguration(); StoragePipelineResult etlResult = runDefaultETL(getResourceUri("variant-test-somatic.vcf"), getVariantStorageEngine(), studyConfiguration, new ObjectMap(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), Arrays.asList("GL", "DP", "AU", "CU", "GU", "TU")) // .append(VariantStorageEngine.Options.FILE_ID.key(), 2) .append(VariantStorageEngine.Options.ANNOTATE.key(), false) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false) ); VariantDBIterator iterator = getVariantStorageEngine().getDBAdaptor().iterator(new Query(VariantQueryParam.UNKNOWN_GENOTYPE.key(), "./."), new QueryOptions()); while (iterator.hasNext()) { Variant variant = iterator.next(); assertThat(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "GT"), anyOf(is("./."), is("."))); assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "DP")); assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "GL")); assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "AU")); assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "CU")); assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "GU")); assertNotNull(variant.getStudy(STUDY_NAME).getSampleData("SAMPLE_1", "TU")); } }
@Test public void testIncludeFiles() throws Exception { query = new Query() .append(VariantQueryParam.INCLUDE_STUDY.key(), "S_1") .append(VariantQueryParam.INCLUDE_FILE.key(), file12877); queryResult = query(query, options); assertEquals(dbAdaptor.count(null).first().intValue(), queryResult.getNumResults()); for (Variant variant : queryResult.getResult()) { assertTrue(variant.getStudies().size() <= 1); StudyEntry s_1 = variant.getStudy("S_1"); if (s_1 != null) { assertTrue(s_1.getFiles().size() <= 1); if (s_1.getFiles().size() == 1) { assertNotNull(s_1.getFile(file12877)); } } assertTrue(variant.getStudies().size() <= 1); } assertThat(queryResult, everyResult(allOf(not(withStudy("S_2")), not(withStudy("S_3")), not(withStudy("S_4"))))); }
@Test public void removeSingleFileTest() throws Exception { StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); System.out.println("studyConfiguration = " + studyConfiguration); String studyName = studyConfiguration.getStudyName(); Map<String, Object> options = Collections.singletonMap(HadoopVariantStorageEngine.VARIANT_TABLE_INDEXES_SKIP, true); loadFile("s1.genome.vcf", studyConfiguration, options); Map<String, Variant> variants = buildVariantsIdx(); assertFalse(variants.containsKey("1:10014:A:G")); assertTrue(variants.containsKey("1:10013:T:C")); assertEquals("0/1", variants.get("1:10013:T:C").getStudy(studyName).getSampleData("s1", "GT")); VariantHadoopDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(); VariantHbaseTestUtils.printVariants(getVariantStorageEngine().getDBAdaptor(), newOutputUri()); // delete removeFile("s1.genome.vcf", studyConfiguration, options); VariantHbaseTestUtils.printVariants(getVariantStorageEngine().getDBAdaptor(), newOutputUri()); checkSampleIndexTable(studyConfiguration, dbAdaptor, "s1.genome.vcf"); variants = buildVariantsIdx(); assertEquals("Expected none variants", 0, variants.size()); assertEquals("Expected none indexed files", 0, studyConfiguration.getIndexedFiles().size()); }
@Test public void indexWithoutOtherFields() throws Exception { StudyConfiguration studyConfiguration = newStudyConfiguration(); runDefaultETL(smallInputUri, getVariantStorageEngine(), studyConfiguration, new ObjectMap(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), VariantQueryUtils.NONE) .append(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "avro") .append(VariantStorageEngine.Options.ANNOTATE.key(), false) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false) ); for (Variant variant : variantStorageEngine.getDBAdaptor()) { assertEquals("GT", variant.getStudy(STUDY_NAME).getFormatAsString()); } }