private List<String> createCohortsIfNeeded(String studyId, Set<String> cohortNames, String sessionId) throws CatalogException { List<String> cohorts = new ArrayList<>(); // Silent query, so it does not fail for missing cohorts Set<String> catalogCohorts = catalogManager.getCohortManager().get(studyId, new ArrayList<>(cohortNames), new Query(), new QueryOptions(QueryOptions.INCLUDE, "name,id"), true, sessionId) .stream() .map(QueryResult::first) .filter(Objects::nonNull) .map(Cohort::getId) .collect(Collectors.toSet()); for (String cohortName : cohortNames) { if (!catalogCohorts.contains(cohortName)) { QueryResult<Cohort> cohort = catalogManager.getCohortManager().create(studyId, cohortName, Study.Type.COLLECTION, "", Collections.emptyList(), null, null, sessionId); logger.info("Creating cohort {}", cohortName); cohorts.add(cohort.first().getId()); } else { logger.debug("cohort {} was already created", cohortName); cohorts.add(cohortName); } } return cohorts; }
private Cohort createDefaultCohortIfNeeded(Study study, String sessionId) throws CatalogException { Query query = new Query(CohortDBAdaptor.QueryParams.ID.key(), StudyEntry.DEFAULT_COHORT); Cohort cohort = catalogManager.getCohortManager().get(study.getFqn(), query, null, sessionId).first(); if (cohort == null) { try { return createDefaultCohort(study, sessionId); } catch (CatalogDBException e) { if (e.getMessage().contains("already exists")) { cohort = catalogManager.getCohortManager().get(study.getFqn(), query, null, sessionId).first(); if (cohort == null) { throw e; } else { // Do not fail when concurrent cohort creation. return cohort; } } else { throw e; } } } else { return cohort; } }
public static List<Cohort> createCohorts(String sessionId, String studyId, String tagmapPath, CatalogManager catalogManager, Logger logger) throws IOException, CatalogException { List<Cohort> queryResults = new ArrayList<>(); Properties tagmap = new Properties(); tagmap.load(new FileInputStream(tagmapPath)); Map<String, Cohort> cohorts = catalogManager.getCohortManager().get(studyId, new Query(), null, sessionId) .getResult().stream().collect(Collectors.toMap(Cohort::getId, c->c)); Set<String> catalogCohorts = cohorts.keySet(); for (String cohortName : VariantAggregatedStatsCalculator.getCohorts(tagmap)) { if (!catalogCohorts.contains(cohortName)) { QueryResult<Cohort> cohort = catalogManager.getCohortManager().create(studyId, new Cohort() .setId(cohortName) .setName(cohortName) .setSamples(Collections.emptyList()) .setType(Study.Type.COLLECTION), null, sessionId); queryResults.add(cohort.first()); } else { logger.warn("cohort {} was already created", cohortName); queryResults.add(cohorts.get(cohortName)); } } return queryResults; }
public void calculateAggregatedStats(QueryOptions options) throws Exception { // coh0 = catalogManager.createCohort(studyId, "ALL", Cohort.Type.COLLECTION, "", file.getSampleIds(), null, sessionId).first().getId(); String cohId = catalogManager.getCohortManager().get(studyId, (Query) null, null, sessionId).first().getId(); calculateStats(cohId, options); checkCalculatedAggregatedStats(Collections.singleton(DEFAULT_COHORT), dbName); }
@Test public void testCalculateAggregatedExacStats() throws Exception { beforeAggregated("exachead.vcf.gz", Aggregation.EXAC); String tagMap = getResourceUri("exac-tag-mapping.properties").getPath(); List<String> cohortIds = createCohorts(sessionId, studyId, tagMap, catalogManager, logger) .stream().map(Cohort::getId).map(Object::toString).collect(Collectors.toList()); QueryOptions options = new QueryOptions(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), tagMap); calculateStats(options, cohortIds); List<Cohort> cohorts = catalogManager.getCohortManager().get(studyId, (Query) null, null, sessionId).getResult(); Set<String> cohortNames = cohorts .stream() .map(Cohort::getId) .collect(Collectors.toSet()); assertEquals(8, cohortNames.size()); for (Cohort cohort : cohorts) { assertEquals(Cohort.CohortStatus.READY, cohort.getStatus().getName()); } // checkCalculatedAggregatedStats(cohorts, dbName); }
public static void checkCalculatedStats(String studyId, Map<String, Cohort> cohorts, CatalogManager catalogManager, String dbName, String sessionId) throws Exception { VariantDBAdaptor dbAdaptor = StorageEngineFactory.get().getVariantStorageEngine(null, dbName).getDBAdaptor(); for (Variant variant : dbAdaptor) { for (StudyEntry sourceEntry : variant.getStudies()) { assertEquals("In variant " + variant.toString(), cohorts.size(), sourceEntry.getStats().size()); for (Map.Entry<String, VariantStats> entry : sourceEntry.getStats().entrySet()) { assertTrue("In variant " + variant.toString(), cohorts.containsKey(entry.getKey())); if (cohorts.get(entry.getKey()) != null) { assertEquals("Variant: " + variant.toString() + " does not have the correct number of samples in cohort '" + entry.getKey() + "'. jsonVariant: " + variant.toJson() , cohorts.get(entry.getKey()).getSamples().size(), entry.getValue().getGenotypeCount().values().stream().reduce(Integer::sum).orElse(0).intValue()); } } } } for (Cohort cohort : cohorts.values()) { cohort = catalogManager.getCohortManager().get(studyId, cohort.getId(), null, sessionId).first(); assertEquals(Cohort.CohortStatus.READY, cohort.getStatus().getName()); } }
@Test public void testCalculateAggregatedExacStatsExplicitCohorts() throws Exception { beforeAggregated("exachead.vcf.gz", Aggregation.EXAC); String tagMap = getResourceUri("exac-tag-mapping.properties").getPath(); QueryOptions options = new QueryOptions(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), tagMap); calculateStats(options, Arrays.asList("AFR", "ALL", "AMR", "EAS", "FIN", "NFE", "OTH", "SAS")); List<Cohort> cohorts = catalogManager.getCohortManager().get(studyId, (Query) null, null, sessionId).getResult(); Set<String> cohortNames = cohorts .stream() .map(Cohort::getId) .collect(Collectors.toSet()); assertEquals(8, cohortNames.size()); for (Cohort cohort : cohorts) { assertEquals(Cohort.CohortStatus.READY, cohort.getStatus().getName()); } // checkCalculatedAggregatedStats(cohorts, dbName); }
@Test public void testCalculateStatsOneByOne() throws Exception { before(); Map<String, Cohort> cohorts = new HashMap<>(); calculateStats(coh[0]); cohorts.put(coh[0], catalogManager.getCohortManager().get(studyId, coh[0], null, sessionId).first()); // cohorts.put("all", null); checkCalculatedStats(cohorts); // Job job = variantStorage.calculateStats(outputId, Collections.singletonList(coh1), sessionId, new QueryOptions(ExecutorManager.EXECUTE, true)).first(); // assertEquals(Status.READY, job.getStatus().getName()); cohorts.put(coh[1], catalogManager.getCohortManager().get(studyId, coh[1], null, sessionId).first()); calculateStats(coh[1]); checkCalculatedStats(cohorts); calculateStats(coh[2]); cohorts.put(coh[2], catalogManager.getCohortManager().get(studyId, coh[2], null, sessionId).first()); checkCalculatedStats(cohorts); calculateStats(coh[3]); cohorts.put(coh[3], catalogManager.getCohortManager().get(studyId, coh[3], null, sessionId).first()); checkCalculatedStats(cohorts); calculateStats(coh[4]); cohorts.put(coh[4], catalogManager.getCohortManager().get(studyId, coh[4], null, sessionId).first()); checkCalculatedStats(cohorts); }
@Test public void testCalculateInvalidStats() throws Exception { before(); calculateStats(coh[0]); DummyVariantStorageEngine vsm = mockVariantStorageManager(); String message = "Error"; doThrow(new StorageEngineException(message)).when(vsm).calculateStats(any(), any(List.class), any()); doThrow(new StorageEngineException(message)).when(vsm).calculateStats(any(), any(Map.class), any()); try { calculateStats(coh[1]); fail(); } catch (StorageEngineException e) { assertEquals(message, e.getCause().getMessage()); } Cohort coh1 = catalogManager.getCohortManager().get(studyId, coh[1], null, sessionId).first(); assertEquals(Cohort.CohortStatus.INVALID, coh1.getStatus().getName()); vsm = mockVariantStorageManager(); calculateStats(coh[1]); }
@Test public void testCalculateStatsGroups() throws Exception { before(); Map<String, Cohort> cohorts = new HashMap<>(); calculateStats(new QueryOptions(), coh[0], coh[1], coh[2]); cohorts.put(coh[0], catalogManager.getCohortManager().get(studyId, coh[0], null, sessionId).first()); cohorts.put(coh[1], catalogManager.getCohortManager().get(studyId, coh[1], null, sessionId).first()); cohorts.put(coh[2], catalogManager.getCohortManager().get(studyId, coh[2], null, sessionId).first()); checkCalculatedStats(cohorts); try { calculateStats(new QueryOptions(), all, coh[3], "-" + coh[4]); fail(); } catch (CatalogException e) { logger.info("received expected exception. this is OK, there is no cohort " + ("-" + coh[4]) + '\n'); } assertEquals(Cohort.CohortStatus.NONE, catalogManager.getCohortManager().get(studyId, "ALL", null, sessionId).first().getStatus().getName()); assertEquals(Cohort.CohortStatus.NONE, catalogManager.getCohortManager().get(studyId, coh[3], null, sessionId).first().getStatus().getName()); assertEquals(Cohort.CohortStatus.NONE, catalogManager.getCohortManager().get(studyId, coh[4], null, sessionId).first().getStatus().getName()); calculateStats(new QueryOptions(), all, coh[3], coh[4]); cohorts.put(DEFAULT_COHORT, catalogManager.getCohortManager().get(studyId, DEFAULT_COHORT, null, sessionId).first()); cohorts.put(coh[3], catalogManager.getCohortManager().get(studyId, coh[3], null, sessionId).first()); cohorts.put(coh[4], catalogManager.getCohortManager().get(studyId, coh[4], null, sessionId).first()); checkCalculatedStats(cohorts); }
@Test public void testCalculateAggregatedExacStatsWithoutCohorts() throws Exception { beforeAggregated("exachead.vcf.gz", Aggregation.EXAC); String tagMap = getResourceUri("exac-tag-mapping.properties").getPath(); QueryOptions options = new QueryOptions(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), tagMap); calculateStats(options); List<Cohort> cohorts = catalogManager.getCohortManager().get(studyId, (Query) null, null, sessionId).getResult(); Set<String> cohortNames = cohorts .stream() .map(Cohort::getId) .collect(Collectors.toSet()); assertEquals(8, cohortNames.size()); for (Cohort cohort : cohorts) { assertEquals(Cohort.CohortStatus.READY, cohort.getStatus().getName()); } // checkCalculatedAggregatedStats(cohorts, dbName); }
private void removeFile(List<File> files, QueryOptions options, String outputId) throws Exception { List<String> fileIds = files.stream().map(File::getId).collect(Collectors.toList()); Study study = catalogManager.getFileManager().getStudy(files.get(0), sessionId); String studyId = study.getFqn(); List<File> removedFiles = variantManager.removeFile(fileIds, studyId, sessionId, new QueryOptions()); assertEquals(files.size(), removedFiles.size()); Cohort all = catalogManager.getCohortManager().get(studyId, new Query(CohortDBAdaptor.QueryParams.ID.key(), StudyEntry.DEFAULT_COHORT), null, sessionId).first(); Set<Long> allSampleIds = all.getSamples().stream().map(Sample::getUid).collect(Collectors.toSet()); assertThat(all.getStatus().getName(), anyOf(is(Cohort.CohortStatus.INVALID), is(Cohort.CohortStatus.NONE))); Set<Long> loadedSamples = catalogManager.getFileManager().get(studyId, new Query(FileDBAdaptor.QueryParams.INDEX_STATUS_NAME.key (), FileIndex.IndexStatus.READY), null, sessionId) .getResult() .stream() .flatMap(f -> f.getSamples().stream()) .map(Sample::getUid) .collect(Collectors.toSet()); assertEquals(loadedSamples, allSampleIds); for (File file : removedFiles) { assertEquals(FileIndex.IndexStatus.TRANSFORMED, file.getIndex().getStatus().getName()); } }
protected Cohort getDefaultCohort(String studyId) throws CatalogException { return catalogManager.getCohortManager().get(studyId, new Query(CohortDBAdaptor.QueryParams.ID.key(), DEFAULT_COHORT), new QueryOptions(), sessionId).first(); }
private String updateDefaultCohortStatus(Study study, String status, String sessionId) throws CatalogException { Query query = new Query(CohortDBAdaptor.QueryParams.ID.key(), StudyEntry.DEFAULT_COHORT); Cohort defaultCohort = catalogManager.getCohortManager().get(study.getFqn(), query, new QueryOptions(), sessionId).first(); String prevStatus = defaultCohort.getStatus().getName(); catalogManager.getCohortManager().setStatus(study.getFqn(), defaultCohort.getId(), status, null, sessionId); return prevStatus; }
assertEquals(Cohort.CohortStatus.NONE, catalogManager.getCohortManager().get(studyId, coh[0], null, sessionId).first().getStatus().getName()); assertEquals(Cohort.CohortStatus.READY, catalogManager.getCohortManager().get(studyId, coh[0], null, sessionId).first().getStatus().getName()); cohorts.put("coh0", catalogManager.getCohortManager().get(studyId, coh[0], null, sessionId).first()); checkCalculatedStats(cohorts); assertEquals(Cohort.CohortStatus.READY, catalogManager.getCohortManager().get(studyId, coh[0], null, sessionId).first().getStatus().getName()); List<String> newCohort = catalogManager.getCohortManager().get(studyId, coh[0], null, sessionId).first().getSamples().stream() .map(Sample::getId) .skip(10).limit(100) .collect(Collectors.toList()); catalogManager.getCohortManager().update(studyId, coh[0], new ObjectMap("samples", newCohort), new QueryOptions(), sessionId); assertEquals(Cohort.CohortStatus.INVALID, catalogManager.getCohortManager().get(studyId, coh[0], null, sessionId).first().getStatus().getName()); assertEquals(Cohort.CohortStatus.READY, catalogManager.getCohortManager().get(studyId, coh[0], null, sessionId).first().getStatus().getName()); cohorts.put("coh0", catalogManager.getCohortManager().get(studyId, coh[0], null, sessionId).first()); checkCalculatedStats(cohorts);
private void removeStudy(Object study, QueryOptions options) throws Exception { variantManager.removeStudy(study.toString(), sessionId, new QueryOptions()); Query query = new Query(FileDBAdaptor.QueryParams.INDEX_STATUS_NAME.key(), FileIndex.IndexStatus.READY); assertEquals(0L, catalogManager.getFileManager().count(study.toString(), query, sessionId).getNumTotalResults()); Cohort all = catalogManager.getCohortManager().get(studyId, new Query(CohortDBAdaptor.QueryParams.ID.key(), StudyEntry.DEFAULT_COHORT), null, sessionId).first(); assertTrue(all.getSamples().isEmpty()); }
@Test public void testResumeCalculateStats() throws Exception { before(); calculateStats(coh[0]); catalogManager.getCohortManager().setStatus(studyId, coh[1], Cohort.CohortStatus.CALCULATING, "", sessionId); Cohort coh1 = catalogManager.getCohortManager().get(studyId, coh[1], null, sessionId).first(); Exception expected = VariantStatsStorageOperation.unableToCalculateCohortCalculating(coh1); try { calculateStats(coh[1]); fail(); } catch (Exception e) { assertThat(e, instanceOf(expected.getClass())); assertThat(e, hasMessage(is(expected.getMessage()))); } calculateStats(coh[1], new QueryOptions(VariantStorageEngine.Options.RESUME.key(), true)); }
public void before () throws Exception { File file = opencga.createFile(studyId, "1000g_batches/1-500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz", sessionId); for (int i = 0; i < coh.length; i++) { Cohort cohort = catalogManager.getCohortManager().create(studyId, "coh" + i, Study.Type.CONTROL_SET, "", file.getSamples().subList(file.getSamples() .size() / coh.length * i, file.getSamples().size() / coh.length * (i + 1)), null, null, sessionId).first(); coh[i] = cohort.getId(); } QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false); queryOptions.putIfNotNull(StorageOperation.CATALOG_PATH, outputId); variantManager.index(studyId, file.getId(), createTmpOutdir(file), queryOptions, sessionId); all = catalogManager.getCohortManager().get(studyId, new Query(CohortDBAdaptor.QueryParams.ID.key(), DEFAULT_COHORT), new QueryOptions(), sessionId).first().getId(); }
@Test public void testDeleteCohort() throws CatalogException, IOException { String studyId = "user@1000G:phase1"; Sample sampleId1 = catalogManager.getSampleManager().create(studyId, new Sample().setId("SAMPLE_1"), new QueryOptions(), sessionIdUser).first(); Sample sampleId2 = catalogManager.getSampleManager().create(studyId, new Sample().setId("SAMPLE_2"), new QueryOptions(), sessionIdUser).first(); Sample sampleId3 = catalogManager.getSampleManager().create(studyId, new Sample().setId("SAMPLE_3"), new QueryOptions(), sessionIdUser).first(); Cohort myCohort = catalogManager.getCohortManager().create(studyId, new Cohort().setId("MyCohort").setType(Study.Type.FAMILY) .setSamples(Arrays.asList(sampleId1, sampleId2, sampleId3)), null, sessionIdUser).first(); assertEquals("MyCohort", myCohort.getId()); assertEquals(3, myCohort.getSamples().size()); assertTrue(myCohort.getSamples().stream().map(Sample::getUid).collect(Collectors.toList()).contains(sampleId1.getUid())); assertTrue(myCohort.getSamples().stream().map(Sample::getUid).collect(Collectors.toList()).contains(sampleId2.getUid())); assertTrue(myCohort.getSamples().stream().map(Sample::getUid).collect(Collectors.toList()).contains(sampleId3.getUid())); WriteResult deleteResult = catalogManager.getCohortManager().delete(studyId, new Query(CohortDBAdaptor.QueryParams.UID.key(), myCohort.getUid()), null, sessionIdUser); assertEquals(1, deleteResult.getNumModified()); Query query = new Query() .append(CohortDBAdaptor.QueryParams.UID.key(), myCohort.getUid()) .append(CohortDBAdaptor.QueryParams.STATUS_NAME.key(), "!=" + Cohort.CohortStatus.READY); Cohort cohort = catalogManager.getCohortManager().get(studyId, query, null, sessionIdUser).first(); assertEquals(Status.DELETED, cohort.getStatus().getName()); }