public static URI getResourceUri(String resourceName) throws IOException { return getResourceUri(resourceName, resourceName); }
public static URI getPlatinumFile(int fileId) throws IOException { String fileName; if (fileId < 17) { fileName = "1K.end.platinum-genomes-vcf-NA" + (fileId + 12877) + "_S1.genome.vcf.gz"; } else if (fileId >= 12877 && fileId <= 12893){ fileName = "1K.end.platinum-genomes-vcf-NA" + fileId + "_S1.genome.vcf.gz"; } else { throw new IllegalArgumentException("Unknown platinum file " + fileId); } return getResourceUri("platinum/" + fileName); }
protected File create(String resourceName) throws IOException, CatalogException { return create(studyId, getResourceUri(resourceName)); }
@Override protected void before() throws Throwable { super.before(); Path rootDir = getTmpRootDir(); String confFolder = VariantSearchManager.CONF_SET; // Copy configuration getResourceUri("configsets/variantsCollection/solrconfig.xml", "configsets/" + confFolder + "/solrconfig.xml"); getResourceUri("solr/variant/managed-schema", "configsets/" + confFolder + "/managed-schema"); getResourceUri("configsets/variantsCollection/params.json", "configsets/" + confFolder + "/params.json"); getResourceUri("configsets/variantsCollection/protwords.txt", "configsets/" + confFolder + "/protwords.txt"); getResourceUri("configsets/variantsCollection/stopwords.txt", "configsets/" + confFolder + "/stopwords.txt"); getResourceUri("configsets/variantsCollection/synonyms.txt", "configsets/" + confFolder + "/synonyms.txt"); getResourceUri("configsets/variantsCollection/lang/stopwords_en.txt", "configsets/" + confFolder + "/lang/stopwords_en.txt"); String solrHome = rootDir.resolve("solr").toString(); if (embeded) { solrClient = create(solrHome, rootDir.resolve("configsets").toString(), coreName); } else { String host = "http://localhost:8983/solr"; String configSet = VariantSearchManager.CONF_SET; int timeout = 5000; SolrManager solrManager = new SolrManager(host, "core", timeout); if (!solrManager.existsCore(coreName)) { solrManager.createCore(coreName, configSet); } this.solrClient = solrManager.getSolrClient(); } }
@Before public void before() throws Exception { inputFile1 = create(studyId, getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz", "platinum_1/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz"), "data/platinum_1/"); inputFile2 = create(studyId, getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz", "platinum_2/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz"), "data/platinum_2/"); System.out.println("inputFile1 = " + inputFile1.getUid()); System.out.println("inputFile2 = " + inputFile2.getUid()); }
@Test public void testCalculateNonAggregatedWithAggregationMappingFile() throws Exception { before(); String tagMap = getResourceUri("exac-tag-mapping.properties").getPath(); QueryOptions options = new QueryOptions(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), tagMap); thrown.expectMessage(VariantStatsStorageOperation.nonAggregatedWithMappingFile().getMessage()); calculateStats(options, Arrays.asList("ALL")); }
@Test public void testCalculateAggregatedExacStats() throws Exception { beforeAggregated("exachead.vcf.gz", Aggregation.EXAC); String tagMap = getResourceUri("exac-tag-mapping.properties").getPath(); List<String> cohortIds = createCohorts(sessionId, studyId, tagMap, catalogManager, logger) .stream().map(Cohort::getId).map(Object::toString).collect(Collectors.toList()); QueryOptions options = new QueryOptions(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), tagMap); calculateStats(options, cohortIds); List<Cohort> cohorts = catalogManager.getCohortManager().get(studyId, (Query) null, null, sessionId).getResult(); Set<String> cohortNames = cohorts .stream() .map(Cohort::getId) .collect(Collectors.toSet()); assertEquals(8, cohortNames.size()); for (Cohort cohort : cohorts) { assertEquals(Cohort.CohortStatus.READY, cohort.getStatus().getName()); } // checkCalculatedAggregatedStats(cohorts, dbName); }
@Before public void setUp() throws Exception { studyConfiguration = new StudyConfiguration(1, "study"); studyConfiguration.getSampleIds().put("s1", 1); studyConfiguration.getSampleIds().put("s2", 2); studyConfiguration.getSampleIds().put("s3", 3); studyConfiguration.getSampleIds().put("s4", 4); studyConfiguration.getSampleIds().put("s5", 5); studyConfiguration.getSampleIds().put("s6", 6); studyConfiguration.getSampleIds().put("s7", 7); studyConfiguration.getIndexedFiles().add(10); studyConfiguration.getFileIds().put("file1.vcf", 10); studyConfiguration.getSamplesInFiles().put(10, new LinkedHashSet<>(Arrays.asList(1, 2, 3, 4))); studyConfiguration.getIndexedFiles().add(11); studyConfiguration.getFileIds().put("file2.vcf", 11); studyConfiguration.getSamplesInFiles().put(11, new LinkedHashSet<>(Arrays.asList(4, 5, 6))); studyConfiguration.getCalculatedStats().add(20); studyConfiguration.getCohortIds().put("ALL", 20); studyConfiguration.getCohorts().put(20, new HashSet<>(Arrays.asList(1, 2, 3, 4, 5, 6))); URI uri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz"); VariantFileMetadata fileMetadata = VariantReaderUtils.readVariantFileMetadata(Paths.get(uri), null); studyConfiguration.addVariantFileHeader(fileMetadata.getHeader(), null); variantMetadataConverter = new VariantMetadataConverter(); objectWriter = new ObjectMapper() .configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true) .setSerializationInclusion(JsonInclude.Include.NON_EMPTY) .writerWithDefaultPrettyPrinter(); projectMetadata = new ProjectMetadata("hsapiens", "37", 1); }
@Test public void testCalculateAggregatedExacStatsExplicitCohorts() throws Exception { beforeAggregated("exachead.vcf.gz", Aggregation.EXAC); String tagMap = getResourceUri("exac-tag-mapping.properties").getPath(); QueryOptions options = new QueryOptions(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), tagMap); calculateStats(options, Arrays.asList("AFR", "ALL", "AMR", "EAS", "FIN", "NFE", "OTH", "SAS")); List<Cohort> cohorts = catalogManager.getCohortManager().get(studyId, (Query) null, null, sessionId).getResult(); Set<String> cohortNames = cohorts .stream() .map(Cohort::getId) .collect(Collectors.toSet()); assertEquals(8, cohortNames.size()); for (Cohort cohort : cohorts) { assertEquals(Cohort.CohortStatus.READY, cohort.getStatus().getName()); } // checkCalculatedAggregatedStats(cohorts, dbName); }
@Test public void testCalculateAggregatedExacStatsWrongExplicitCohorts() throws Exception { beforeAggregated("exachead.vcf.gz", Aggregation.EXAC); String tagMap = getResourceUri("exac-tag-mapping.properties").getPath(); QueryOptions options = new QueryOptions(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), tagMap); thrown.expectMessage(VariantStatsStorageOperation.differentCohortsThanMappingFile().getMessage()); calculateStats(options, Arrays.asList("AFR", "ALL")); }
@Test public void testCalculateAggregatedExacStatsWithoutCohorts() throws Exception { beforeAggregated("exachead.vcf.gz", Aggregation.EXAC); String tagMap = getResourceUri("exac-tag-mapping.properties").getPath(); QueryOptions options = new QueryOptions(VariantStorageEngine.Options.AGGREGATION_MAPPING_PROPERTIES.key(), tagMap); calculateStats(options); List<Cohort> cohorts = catalogManager.getCohortManager().get(studyId, (Query) null, null, sessionId).getResult(); Set<String> cohortNames = cohorts .stream() .map(Cohort::getId) .collect(Collectors.toSet()); assertEquals(8, cohortNames.size()); for (Cohort cohort : cohorts) { assertEquals(Cohort.CohortStatus.READY, cohort.getStatus().getName()); } // checkCalculatedAggregatedStats(cohorts, dbName); }
public static File create(String resourceName, boolean indexed) throws IOException, CatalogException { File file; URI uri = getResourceUri(resourceName); file = fileMetadataReader.create(studyId, uri, "data/vcfs/", "", true, null, sessionId).first(); catalogFileUtils.upload(uri, file, null, sessionId, false, false, true, false, Long.MAX_VALUE); if (indexed) { FileIndex fileIndex = new FileIndex("user", "today", new FileIndex.IndexStatus(FileIndex.IndexStatus.READY), 1234, Collections.emptyMap()); catalogManager.getFileManager().setFileIndex(studyId, file.getPath(), fileIndex, sessionId); indexedFiles.add((int) file.getUid()); List<String> samples = catalogManager.getCohortManager().getSamples(studyId, cohortId, null, sessionId).getResult().stream().map(Sample::getId).collect(Collectors.toList()); samples.addAll(file.getSamples().stream().map(Sample::getId).collect(Collectors.toList())); catalogManager.getCohortManager().update(studyId, cohortId, new ObjectMap(CohortDBAdaptor.QueryParams.SAMPLES.key(), samples), true, null, sessionId); } return catalogManager.getFileManager().get(studyId, file.getId(), null, sessionId).first(); }
stderr.setThreshold(Level.toLevel("debug")); inputFile = VariantStorageBaseTest.getResourceUri("variant-test-file.vcf.gz").getPath();
@Test public void testTransformThreeFiles() throws Exception { File inputFile3 = create(studyId, getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz", "platinum_3/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz"), "data/platinum_3/"); File transformFile1 = transformFile(inputFile1, new QueryOptions(), "data/transform_1/"); File transformFile2 = transformFile(inputFile2, new QueryOptions(), "data/transform_2/"); File transformFile3 = transformFile(inputFile3, new QueryOptions(), "data/transform_3/"); }
public File createFile(String studyId, String resourceName, String sessionId) throws IOException, CatalogException { File file; URI uri = getResourceUri(resourceName); CatalogManager catalogManager = getCatalogManager(); file = new FileMetadataReader(catalogManager).create(studyId, uri, "data/vcfs/", "", true, null, sessionId).first(); new FileUtils(catalogManager).upload(uri, file, null, sessionId, false, false, true, false, Long.MAX_VALUE); return catalogManager.getFileManager().get(studyId, file.getId(), null, sessionId).first(); }
@BeforeClass public static void beforeClass() throws Exception { variantStorageEngine = externalResource.getVariantStorageEngine(); // URI inputUri = VariantStorageBaseTest.getResourceUri("sample1.genome.vcf"); URI inputUri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz"); VariantStorageBaseTest.runDefaultETL(inputUri, variantStorageEngine, new StudyConfiguration(0, study1), new ObjectMap(VariantStorageEngine.Options.ANNOTATE.key(), true) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false) ); inputUri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12878_S1.genome.vcf.gz"); VariantStorageBaseTest.runDefaultETL(inputUri, variantStorageEngine, new StudyConfiguration(0, study1), new ObjectMap(VariantStorageEngine.Options.ANNOTATE.key(), true) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false) ); inputUri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12878_S1.genome.vcf.gz"); VariantStorageBaseTest.runDefaultETL(inputUri, variantStorageEngine, new StudyConfiguration(0, study2), new ObjectMap(VariantStorageEngine.Options.ANNOTATE.key(), true) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false) ); VariantHbaseTestUtils.printVariants(variantStorageEngine.getDBAdaptor(), newOutputUri()); }
@BeforeClass public static void beforeClass() throws Exception { HadoopVariantStorageEngine variantStorageManager = externalResource.getVariantStorageEngine(); externalResource.clearDB(variantStorageManager.getVariantTableName()); externalResource.clearDB(variantStorageManager.getArchiveTableName(STUDY_ID)); // URI inputUri = VariantStorageBaseTest.getResourceUri("sample1.genome.vcf"); URI inputUri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz"); // URI inputUri = VariantStorageManagerTestUtils.getResourceUri("variant-test-file.vcf.gz"); studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); etlResult = VariantStorageBaseTest.runDefaultETL(inputUri, variantStorageManager, studyConfiguration, new ObjectMap(Options.TRANSFORM_FORMAT.key(), "avro") .append(Options.ANNOTATE.key(), true) .append(Options.CALCULATE_STATS.key(), false) ); fileMetadata = variantStorageManager.readVariantFileMetadata(etlResult.getTransformResult()); VariantSetStats stats = fileMetadata.getStats(); Assert.assertNotNull(stats); try (VariantHadoopDBAdaptor dbAdaptor = variantStorageManager.getDBAdaptor()) { VariantHbaseTestUtils.printVariantsFromVariantsTable(dbAdaptor); VariantHbaseTestUtils.printVariantsFromArchiveTable(dbAdaptor, studyConfiguration); } }
public static VariantFileMetadata loadFile( HadoopVariantStorageEngine variantStorageManager, String dbName, URI outputUri, String resourceName, StudyConfiguration studyConfiguration, Map<? extends String, ?> otherParams, boolean doTransform, boolean loadArchive, boolean loadVariant) throws Exception { URI fileInputUri = VariantStorageBaseTest.getResourceUri(resourceName); ObjectMap params = new ObjectMap(VariantStorageEngine.Options.TRANSFORM_FORMAT.key(), "proto") .append(VariantStorageEngine.Options.STUDY.key(), studyConfiguration.getStudyName()) .append(VariantStorageEngine.Options.DB_NAME.key(), dbName).append(VariantStorageEngine.Options.ANNOTATE.key(), false) .append(VariantAnnotationManager.SPECIES, "hsapiens").append(VariantAnnotationManager.ASSEMBLY, "GRch37") .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false) .append(HadoopVariantStorageEngine.HADOOP_LOAD_DIRECT, true) .append(HadoopVariantStorageEngine.HADOOP_LOAD_ARCHIVE, loadArchive) .append(HadoopVariantStorageEngine.HADOOP_LOAD_VARIANT, loadVariant); if (otherParams != null) { params.putAll(otherParams); } // if (fileId > 0) { // params.append(VariantStorageEngine.Options.FILE_ID.key(), fileId); // } StoragePipelineResult etlResult = VariantStorageBaseTest.runETL(variantStorageManager, fileInputUri, outputUri, params, doTransform, doTransform, loadArchive || loadVariant); StudyConfiguration updatedStudyConfiguration = variantStorageManager.getDBAdaptor().getStudyConfigurationManager().getStudyConfiguration(studyConfiguration.getStudyId(), null).first(); if (updatedStudyConfiguration != null) { studyConfiguration.copy(updatedStudyConfiguration); } return variantStorageManager.readVariantFileMetadata(doTransform ? etlResult.getTransformResult() : etlResult.getInput()); }