@Override protected Map<String, String> getParams() { Map<String, String> params = new LinkedHashMap<>(); params.put("--" + HadoopVariantStorageEngine.STUDY_ID, "<integer>*"); params.put("--" + VariantQueryParam.REGION.key(), "<region>"); params.put(OUTPUT_FILE, "<path>"); params.put(BATCH_SIZE, "<integer>"); return params; }
@Override public String toString() { return key() + " [" + type() + "] : " + description(); }
public static Set<VariantQueryParam> validParams(Query query) { Set<VariantQueryParam> params = new HashSet<>(query == null ? 0 : query.size()); for (VariantQueryParam queryParam : values()) { if (isValidParam(query, queryParam)) { params.add(queryParam); } } return params; }
private void putGenotypesNegated(Map<String, List<String>> genotypes, Query query) { query.put(VariantQueryParam.GENOTYPE.key(), StringUtils.join(genotypes.entrySet().stream() .map(entry -> entry.getKey() + IS + StringUtils.join(NOT + entry.getValue(), AND)) .collect(Collectors.toList()), AND)); }
public static Query getVariantQuery(Map<String, ?> queryOptions) { Query query = new Query(); for (VariantQueryParam queryParams : VariantQueryParam.values()) { if (queryOptions.containsKey(queryParams.key())) { query.put(queryParams.key(), queryOptions.get(queryParams.key())); } } if (queryOptions.containsKey(VariantCatalogQueryUtils.SAMPLE_ANNOTATION.key())) { query.put(VariantCatalogQueryUtils.SAMPLE_ANNOTATION.key(), queryOptions.get(VariantCatalogQueryUtils.SAMPLE_ANNOTATION.key())); } if (queryOptions.containsKey(VariantCatalogQueryUtils.PROJECT.key())) { query.put(VariantCatalogQueryUtils.PROJECT.key(), queryOptions.get(VariantCatalogQueryUtils.PROJECT.key())); } if (queryOptions.containsKey(VariantCatalogQueryUtils.FAMILY.key())) { query.put(VariantCatalogQueryUtils.FAMILY.key(), queryOptions.get(VariantCatalogQueryUtils.FAMILY.key())); } if (queryOptions.containsKey(VariantCatalogQueryUtils.FAMILY_PHENOTYPE.key())) { query.put(VariantCatalogQueryUtils.FAMILY_PHENOTYPE.key(), queryOptions.get(VariantCatalogQueryUtils.FAMILY_PHENOTYPE.key())); } if (queryOptions.containsKey(VariantCatalogQueryUtils.MODE_OF_INHERITANCE.key())) { query.put(VariantCatalogQueryUtils.MODE_OF_INHERITANCE.key(), queryOptions.get(VariantCatalogQueryUtils.MODE_OF_INHERITANCE.key())); } if (queryOptions.containsKey(VariantCatalogQueryUtils.PANEL.key())) { query.put(VariantCatalogQueryUtils.PANEL.key(), queryOptions.get(VariantCatalogQueryUtils.PANEL.key())); } return query; }
@Override protected String getJobOperationName() { String regionStr = getConf().get(VariantQueryParam.REGION.key()); return "prepare_fill_missing" + (StringUtils.isNotEmpty(regionStr) ? "_" + regionStr : ""); }
for (VariantQueryParam acceptedValue : VariantQueryParam.values()) { if (options.get(acceptedValue.key()) != null) { query.put(acceptedValue.key(), options.get(acceptedValue.key())); List<Region> regions = Region.parseRegions(options.getString(VariantQueryParam.REGION.key())); regions = regions == null ? Collections.emptyList() : regions; int regionsSize = regions.stream().reduce(0, (size, r) -> size += r.getEnd() - r.getStart(), (a, b) -> a + b);
public Map<String, Set<Variant>> getSamplesInAnyVariants(List<?> variants, String study, List<String> samples, List<String> genotypes) { List<String> variantsList = variants.stream().map(Object::toString).collect(Collectors.toList()); Query query = new Query(VariantQueryParam.ID.key(), variantsList) .append(VariantQueryParam.STUDY.key(), study) .append(VariantQueryParam.INCLUDE_STUDY.key(), study) .append(VariantQueryParam.INCLUDE_SAMPLE.key(), samples); return getSamplesInAnyVariants(query, genotypes); }
public Collection<String> getSamplesInAllVariants(List<?> variants, String study, List<String> samples, List<String> genotypes) { List<String> variantsList = variants.stream().map(Object::toString).collect(Collectors.toList()); Query query = new Query(VariantQueryParam.ID.key(), variantsList) .append(VariantQueryParam.STUDY.key(), study) .append(VariantQueryParam.INCLUDE_STUDY.key(), study) .append(VariantQueryParam.INCLUDE_SAMPLE.key(), samples); return getSamplesInAllVariants(query, genotypes); }
private String[] getStudies(Query query) { // Sanity check for QUAL and FILTER, only one study is permitted, but multiple files String[] studies = null; if (StringUtils.isNotEmpty(query.getString(VariantQueryParam.STUDY.key()))) { studies = query.getString(VariantQueryParam.STUDY.key()).split("[,;]"); for (int i = 0; i < studies.length; i++) { studies[i] = VariantSearchToVariantConverter.studyIdToSearchModel(studies[i]); } } return studies; }
public static Map<String, List<String>> getSamplesMetadataIfRequested(Query query, QueryOptions options, StudyConfigurationManager studyConfigurationManager) { if (query.getBoolean(SAMPLE_METADATA.key(), false)) { return getSamplesMetadata(query, options, studyConfigurationManager); } else { return null; } }
@Override protected void parseAndValidateParameters() throws IOException { super.parseAndValidateParameters(); region = getConf().get(VariantQueryParam.REGION.key(), getConf().get("--" + VariantQueryParam.REGION.key())); batchSize = getConf().getInt(BATCH_SIZE, 1000); outputPath = getConf().get(OUTPUT_FILE); if (StringUtils.isEmpty(outputPath)) { outputPath = "saturation." + TimeUtils.getTime() + ".json"; } caching = getConf().getInt(HadoopVariantStorageEngine.MAPREDUCE_HBASE_SCAN_CACHING, 50); }
public VariantQueryResult<Variant> intersect(Query query, QueryOptions queryOptions, List<String> studyIds, String sessionId) throws CatalogException, IOException, StorageEngineException { Query intersectQuery = new Query(query); intersectQuery.put(VariantQueryParam.STUDY.key(), String.join(VariantQueryUtils.AND, studyIds)); return get(intersectQuery, queryOptions, sessionId); }
private List<Region> getRegions(Query query) { List<Region> regions = new ArrayList<>(); if (isValidParam(query, REGION)) { regions.addAll(Region.parseRegions(query.getString(REGION.key()))); } if (isValidParam(query, ANNOT_GENE_REGIONS)) { regions.addAll(Region.parseRegions(query.getString(ANNOT_GENE_REGIONS.key()))); } regions = mergeRegions(regions); return regions; }
@Test public void queriesWithRelease() throws Exception { System.out.println(queryUtils.parseQuery(new Query(STUDY.key(), "s1").append(VariantQueryParam.SAMPLE.key(), "sample2").append(VariantQueryParam.RELEASE.key(), 2), sessionId).toJson()); System.out.println(queryUtils.parseQuery(new Query(STUDY.key(), "s1").append(VariantQueryParam.SAMPLE.key(), "sample2").append(VariantQueryParam.RELEASE.key(), 1), sessionId).toJson()); System.out.println(queryUtils.parseQuery(new Query(STUDY.key(), "s1").append(VariantQueryParam.INCLUDE_SAMPLE.key(), "sample2").append(VariantQueryParam.RELEASE.key(), 1), sessionId).toJson()); System.out.println(queryUtils.parseQuery(new Query(STUDY.key(), "s1").append(VariantQueryParam.FILE.key(), "file1.vcf").append(VariantQueryParam.RELEASE.key(), 1), sessionId).toJson()); System.out.println(queryUtils.parseQuery(new Query(STUDY.key(), "s1").append(VariantQueryParam.STATS_MAF.key(), "c1>0.1").append(VariantQueryParam.RELEASE.key(), 1), sessionId).toJson()); System.out.println(queryUtils.parseQuery(new Query(STUDY.key(), "s1").append(VariantQueryParam.GENOTYPE.key(), "sample1:HOM_ALT,sample2:HET_REF").append(VariantQueryParam.RELEASE.key(), 1), sessionId).toJson()); }
@Test public void testQuerySampleAddFile() { Document mongoQuery = parser.parseQuery(new Query().append(STUDY.key(), "study_1").append(SAMPLE.key(), "sample_10101")); Document expected = new Document(STUDIES_FIELD + '.' + STUDYID_FIELD, 1) .append("$and", Collections.singletonList( new Document("$or", Arrays.asList( new Document(STUDIES_FIELD + '.' + GENOTYPES_FIELD + ".0/1", 10101), new Document(STUDIES_FIELD + '.' + GENOTYPES_FIELD + ".1/1", 10101))))) .append(STUDIES_FIELD + '.' + FILES_FIELD + '.' + FILEID_FIELD, 1); checkEqualDocuments(expected, mongoQuery); }
@Test public void testQueryUnknownGenotypesNotAddFiles() { // FILES filter should not be used when the genotype filter is the unknown genotype Document mongoQuery = parser.parseQuery(new Query().append(STUDY.key(), "study_1").append(GENOTYPE.key(), "sample_10101" + IS + GenotypeClass.UNKNOWN_GENOTYPE)); Document expected = new Document(STUDIES_FIELD + '.' + STUDYID_FIELD, 1) .append("$and", Collections.singletonList( new Document("$or", Arrays.asList( new Document(STUDIES_FIELD + '.' + GENOTYPES_FIELD + ".?/?", 10101))))); checkEqualDocuments(expected, mongoQuery); }
public void checkNewMissingPositions(VariantHadoopDBAdaptor dbAdaptor) { Variant v; v = dbAdaptor.get(new Query(VariantQueryParam.ID.key(), "1:10821:T:A").append(VariantQueryParam.UNKNOWN_GENOTYPE.key(), "?"), null).first(); assertEquals(0, v.getStudies().get(0).getSecondaryAlternates().size()); assertEquals("./.", v.getStudies().get(0).getSampleData("NA12878", "GT")); assertEquals("./.", v.getStudies().get(0).getSampleData("NA12880", "GT")); v = dbAdaptor.get(new Query(VariantQueryParam.ID.key(), "1:10635:C:G").append(VariantQueryParam.UNKNOWN_GENOTYPE.key(), "?"), null).first(); assertEquals(0, v.getStudies().get(0).getSecondaryAlternates().size()); assertEquals("./.", v.getStudies().get(0).getSampleData("NA12880", "GT")); }
@Test public void queryByPanel() throws Exception { Query query = queryUtils.parseQuery(new Query(STUDY.key(), "s1").append(PANEL.key(), "MyPanel"), sessionId); assertEquals(Arrays.asList("BRCA2","CADM1","CTBP2P1","ADSL"), query.getAsList(GENE.key())); query = queryUtils.parseQuery(new Query(STUDY.key(), "s1").append(PANEL.key(), "MyPanel").append(GENE.key(), "ASDF"), sessionId); assertEquals(Arrays.asList("BRCA2","CADM1","CTBP2P1","ADSL", "ASDF"), query.getAsList(GENE.key())); }