@Test public void multiIndexPlatinum() throws Exception { multiIndexPlatinum(new ObjectMap()); }
@Test public void removeFileTest() throws Exception { removeFileTest(new QueryOptions()); }
@Test public void avroBasicIndex() throws Exception { clearDB(DB_NAME); StudyConfiguration studyConfiguration = newStudyConfiguration(); StoragePipelineResult etlResult = runDefaultETL(smallInputUri, variantStorageEngine, studyConfiguration, new ObjectMap(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "avro")); assertTrue("Incorrect transform file extension " + etlResult.getTransformResult() + ". Expected 'variants.avro.gz'", Paths.get(etlResult.getTransformResult()).toFile().getName().endsWith("variants.avro.gz")); assertEquals(1, studyConfiguration.getIndexedFiles().size()); VariantFileMetadata fileMetadata = checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration); checkLoadedVariants(variantStorageEngine.getDBAdaptor(), studyConfiguration, true, false, true, getExpectedNumLoadedVariants (fileMetadata)); }
clearDB(DB_NAME); ObjectMap params = new ObjectMap(); StudyConfiguration studyConfiguration = newStudyConfiguration(); params.put(VariantStorageEngine.Options.STUDY.key(), studyConfiguration.getStudyName()); params.put(VariantStorageEngine.Options.COMPRESS_METHOD.key(), "snappy"); StoragePipelineResult etlResult = runETL(variantStorageEngine, params, true, true, true); VariantDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(); studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyId(), null).first(); VariantFileMetadata fileMetadata = checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration); checkLoadedVariants(variantStorageEngine.getDBAdaptor(), studyConfiguration, false, false, false, getExpectedNumLoadedVariants (fileMetadata));
@Test public void multiRegionIndex() throws Exception { clearDB(DB_NAME); StudyConfiguration studyConfiguration = new StudyConfiguration(1, "multiRegion"); .append(VariantStorageEngine.Options.ANNOTATE.key(), false); runDefaultETL(getResourceUri("1k.chr1.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration, options); Integer defaultCohortId = studyConfiguration.getCohortIds().get(StudyEntry.DEFAULT_COHORT); assertEquals(2504, studyConfiguration.getCohorts().get(defaultCohortId).size()); assertTrue(studyConfiguration.getIndexedFiles().contains(fileIdChr1)); checkLoadedVariants(getVariantStorageEngine().getDBAdaptor(), studyConfiguration, true, false, false, -1); runDefaultETL(getResourceUri("10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, checkLoadedVariants(getVariantStorageEngine().getDBAdaptor(), studyConfiguration, true, false, false, -1); File[] statsFile1 = getTmpRootDir().toFile().listFiles((dir, name1) -> name1.startsWith(VariantStoragePipeline.buildFilename(studyConfiguration.getStudyName(), fileIdChr1)) && name1.contains("variants")); File[] statsFile2 = getTmpRootDir().toFile().listFiles((dir, name1) -> name1.startsWith(VariantStoragePipeline.buildFilename(studyConfiguration.getStudyName(), fileIdChr22)) && name1.contains("variants")); assertEquals(1, statsFile1.length);
@Test public void multiIndex() throws Exception { clearDB(DB_NAME); int expectedNumVariants = NUM_VARIANTS - 37; //37 variants have been removed from this dataset because had the genotype 0|0 for .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false) .append(VariantStorageEngine.Options.ANNOTATE.key(), false); URI file1Uri = getResourceUri("1000g_batches/1-500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"); runDefaultETL(file1Uri, variantStorageEngine, studyConfigurationMultiFile, options); Integer defaultCohortId = studyConfigurationMultiFile.getCohortIds().get(StudyEntry.DEFAULT_COHORT); assertTrue(studyConfigurationMultiFile.getCohorts().containsKey(defaultCohortId)); URI file2Uri = getResourceUri("1000g_batches/501-1000.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"); runDefaultETL(file2Uri, variantStorageEngine, studyConfigurationMultiFile, options); assertEquals(1000, studyConfigurationMultiFile.getCohorts().get(defaultCohortId).size()); assertEquals(Collections.singleton(defaultCohortId), studyConfigurationMultiFile.getCalculatedStats()); URI file3Uri = getResourceUri("1000g_batches/1001-1500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"); runDefaultETL(file3Uri, variantStorageEngine, studyConfigurationMultiFile, options); assertEquals(1500, studyConfigurationMultiFile.getCohorts().get(defaultCohortId).size()); assertEquals(Collections.emptySet(), studyConfigurationMultiFile.getCalculatedStats()); URI file4Uri = getResourceUri("1000g_batches/1501-2000.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"); runDefaultETL(file4Uri, variantStorageEngine, studyConfigurationMultiFile, options); assertEquals(2000, studyConfigurationMultiFile.getCohorts().get(defaultCohortId).size()); int fileId4 = studyConfigurationMultiFile.getFileIds().get(UriUtils.fileName(file4Uri)); assertTrue(studyConfigurationMultiFile.getIndexedFiles().contains(fileId4)); URI file5Uri = getResourceUri("1000g_batches/2001-2504.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"); runDefaultETL(file5Uri, variantStorageEngine, studyConfigurationMultiFile, options);
public void indexWithOtherFields(String extraFields) throws Exception { StudyConfiguration studyConfiguration = newStudyConfiguration(); StoragePipelineResult etlResult = runDefaultETL(smallInputUri, getVariantStorageEngine(), studyConfiguration, new ObjectMap(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), extraFields) .append(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "avro") checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration, fileMetadata.getStats().getNumVariants()); VariantDBAdaptor dbAdaptor = variantStorageEngine.getDBAdaptor(); checkLoadedVariants(dbAdaptor, studyConfiguration, true, false, false, getExpectedNumLoadedVariants(fileMetadata));
@Test @Ignore public void insertVariantIntoSolr() throws Exception { clearDB(DB_NAME); ObjectMap params = new ObjectMap(); StudyConfiguration studyConfiguration = newStudyConfiguration(); params.put(VariantStorageEngine.Options.STUDY.key(), studyConfiguration.getStudyName()); params.put(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "json"); params.put(VariantStorageEngine.Options.COMPRESS_METHOD.key(), "gZiP"); params.put(VariantStorageEngine.Options.TRANSFORM_THREADS.key(), 1); params.put(VariantStorageEngine.Options.LOAD_THREADS.key(), 1); params.put(VariantStorageEngine.Options.ANNOTATE.key(), true); runETL(variantStorageEngine, params, true, true, true); VariantDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(); VariantSearchManager variantSearchManager = new VariantSearchManager(null, variantStorageEngine.getConfiguration()); // FIXME Collection is not in the configuration any more // variantSearchManager.load(variantStorageEngine.getConfiguration().getSearch().getCollection(), dbAdaptor.iterator()); }
@Test public void multiRegionIndexFail() throws Exception { clearDB(DB_NAME); StudyConfiguration studyConfiguration = new StudyConfiguration(1, "multiRegion"); ObjectMap options = new ObjectMap() .append(VariantStorageEngine.Options.STUDY_TYPE.key(), SampleSetType.CONTROL_SET) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), true) .append(VariantStorageEngine.Options.ANNOTATE.key(), false); runDefaultETL(getResourceUri("1k.chr1.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration, options); studyConfiguration.getFileIds().put("10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz", 6); StorageEngineException exception = StorageEngineException.alreadyLoadedSamples(studyConfiguration, 6); thrown.expect(exception.getClass()); thrown.expectMessage(exception.getMessage()); runDefaultETL(getResourceUri("10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration, options.append(VariantStorageEngine.Options.LOAD_SPLIT_DATA.key(), false)); }
@Test public void indexWithoutOtherFields() throws Exception { StudyConfiguration studyConfiguration = newStudyConfiguration(); runDefaultETL(smallInputUri, getVariantStorageEngine(), studyConfiguration, new ObjectMap(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS.key(), VariantQueryUtils.NONE) .append(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "avro") .append(VariantStorageEngine.Options.ANNOTATE.key(), false) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false) ); for (Variant variant : variantStorageEngine.getDBAdaptor()) { assertEquals("GT", variant.getStudy(STUDY_NAME).getFormatAsString()); } }
@Test public void multiRegionBatchIndex() throws Exception { clearDB(DB_NAME); StudyConfiguration studyConfiguration = new StudyConfiguration(1, "multiRegion"); .append(VariantStorageEngine.Options.ANNOTATE.key(), false); VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); URI chr1 = getResourceUri("1k.chr1.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"); URI chr22 = getResourceUri("10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz");
.append(VariantStorageEngine.Options.ANNOTATE.key(), false); runDefaultETL(getResourceUri("1000g_batches/1-500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration1, options); runDefaultETL(getResourceUri("1000g_batches/501-1000.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration1, options); runDefaultETL(getResourceUri("1000g_batches/1001-1500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration2, options); runDefaultETL(getResourceUri("1000g_batches/1501-2000.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration2, options); runDefaultETL(getResourceUri("1000g_batches/2001-2504.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), variantStorageEngine, studyConfiguration2, options);
private void checkLoadedVariants(VariantDBAdaptor dbAdaptor, StudyConfiguration studyConfiguration, boolean includeSamples, boolean includeSrc) { checkLoadedVariants(dbAdaptor, studyConfiguration, includeSamples, includeSrc, NUM_VARIANTS/*9792*/); }
private VariantFileMetadata checkTransformedVariants(URI variantsJson, StudyConfiguration studyConfiguration) throws StorageEngineException { return checkTransformedVariants(variantsJson, studyConfiguration, -1); }
clearDB(DB_NAME); ObjectMap params = new ObjectMap(); StudyConfiguration studyConfiguration = newStudyConfiguration(); params.put(VariantStorageEngine.Options.STUDY.key(), studyConfiguration.getStudyName()); params.put(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "json"); StoragePipelineResult etlResult = runETL(variantStorageEngine, params, true, true, true); VariantDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(); studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyId(), null).first(); VariantFileMetadata fileMetadata = checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration); checkLoadedVariants(variantStorageEngine.getDBAdaptor(), studyConfiguration, true, false, getExpectedNumLoadedVariants(fileMetadata));
public void multiIndexPlatinum(ObjectMap options) throws Exception { clearDB(DB_NAME); options.putIfAbsent(VariantStorageEngine.Options.ANNOTATE.key(), false); VariantStorageEngine variantStorageManager = getVariantStorageEngine(); VariantDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor(); StudyConfigurationManager studyConfigurationManager = dbAdaptor.getStudyConfigurationManager(); ObjectMap fileOptions = new ObjectMap(); fileOptions.putAll(options); runDefaultETL(getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA128" + fileId + "_S1.genome.vcf.gz"), variantStorageManager, studyConfigurationMultiFile, fileOptions); studyConfigurationMultiFile = studyConfigurationManager.getStudyConfiguration(studyConfigurationMultiFile.getStudyId(), null).first(); uris.add(getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA128" + fileId + "_S1.genome.vcf.gz")); variantStorageManager = getVariantStorageEngine(); variantStorageManager.getConfiguration().getStorageEngine(variantStorageManager.getStorageEngineId()).getVariant().getOptions() .append(VariantStorageEngine.Options.STUDY.key(), studyConfigurationBatchFile.getStudyName()) checkLoadedVariants(dbAdaptor, studyConfigurationBatchFile, true, false, -1);
private void checkLoadedVariants(VariantDBAdaptor dbAdaptor, StudyConfiguration studyConfiguration, boolean includeSamples, boolean includeSrc, int expectedNumVariants) { checkLoadedVariants(dbAdaptor, studyConfiguration, includeSamples, includeSrc, false, expectedNumVariants); }
@Test public void basicIndex() throws Exception { clearDB(DB_NAME); StudyConfiguration studyConfiguration = newStudyConfiguration(); StoragePipelineResult etlResult = runDefaultETL(smallInputUri, variantStorageEngine, studyConfiguration, new ObjectMap(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "json")); assertTrue("Incorrect transform file extension " + etlResult.getTransformResult() + ". Expected 'variants.json.gz'", Paths.get(etlResult.getTransformResult()).toFile().getName().endsWith("variants.json.gz")); VariantFileMetadata fileMetadata = variantStorageEngine.getVariantReaderUtils().readVariantFileMetadata(etlResult.getTransformResult()); assertEquals(1, studyConfiguration.getIndexedFiles().size()); checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration); checkLoadedVariants(variantStorageEngine.getDBAdaptor(), studyConfiguration, true, false, true, getExpectedNumLoadedVariants(fileMetadata)); }
@Test public void multiIndexPlatinumMergeSimple() throws Exception { super.multiIndexPlatinum(new ObjectMap(VariantStorageEngine.Options.MERGE_MODE.key(), VariantStorageEngine.MergeMode.BASIC)); checkPlatinumDatabase(d -> ((List) d.get(FILES_FIELD)).size(), Collections.singleton(GenotypeClass.UNKNOWN_GENOTYPE)); }
variantStorageEngineExpected, studyConfiguration2, options); super.removeFileTest(params); VariantMongoDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor();