private long countMatchedEntities(SortaJobExecution sortaJobExecution, boolean isMatched) { double threshold = sortaJobExecution.getThreshold(); QueryRule validatedRule = new QueryRule(MatchingTaskContentMetaData.VALIDATED, EQUALS, isMatched); QueryRule thresholdRule = new QueryRule( MatchingTaskContentMetaData.SCORE, isMatched ? GREATER_EQUAL : LESS, threshold); QueryRule combinedRule = new QueryRule(asList(validatedRule, new QueryRule(isMatched ? OR : AND), thresholdRule)); return dataService.count( sortaJobExecution.getResultEntityName(), new QueryImpl<>(combinedRule)); }
/** * Create disMaxJunc query rule based a list of queryTerm. All queryTerms are lower cased and stop words are removed * * @param queryTerms * @return disMaxJunc queryRule */ public QueryRule createDisMaxQueryRuleForTerms(List<String> queryTerms) { List<QueryRule> rules = new ArrayList<QueryRule>(); queryTerms.stream().filter(StringUtils::isNotEmpty).map(this::escapeCharsExcludingCaretChar).forEach(query -> { rules.add(new QueryRule(AttributeMetadata.LABEL, Operator.FUZZY_MATCH, query)); rules.add(new QueryRule(AttributeMetadata.DESCRIPTION, Operator.FUZZY_MATCH, query)); }); QueryRule finalDisMaxQuery = new QueryRule(rules); finalDisMaxQuery.setOperator(Operator.DIS_MAX); return finalDisMaxQuery; }
/** * Create disMaxJunc query rule based a list of queryTerm. All queryTerms are lower cased and stop * words are removed * * @return disMaxJunc queryRule */ public QueryRule createDisMaxQueryRuleForTerms(List<String> queryTerms) { List<QueryRule> rules = new ArrayList<>(); queryTerms .stream() .filter(StringUtils::isNotEmpty) .map(this::escapeCharsExcludingCaretChar) .forEach( query -> { rules.add(new QueryRule(AttributeMetadata.LABEL, Operator.FUZZY_MATCH, query)); rules.add(new QueryRule(AttributeMetadata.DESCRIPTION, Operator.FUZZY_MATCH, query)); }); QueryRule finalDisMaxQuery = new QueryRule(rules); finalDisMaxQuery.setOperator(Operator.DIS_MAX); return finalDisMaxQuery; }
if (ontologyEntity != null) { QueryRule queryRule = new QueryRule( Arrays.asList( new QueryRule( OntologyTermMetaData.ONTOLOGY_TERM_SYNONYM, Operator.FUZZY_MATCH, queryRule.setOperator(Operator.DIS_MAX); QueryRule finalQuery = new QueryRule( Arrays.asList( new QueryRule( OntologyTermMetaData.ONTOLOGY, Operator.EQUALS, ontologyEntity), new QueryRule(Operator.AND), queryRule)); long wordCount = dataService.count(ONTOLOGY_TERM, new QueryImpl<>(finalQuery));
@RequestMapping(value = "/search", method = RequestMethod.POST) @ResponseBody public SearchResult searchItems(@RequestBody Map<String, Object> request) { Object catalogId = request.get("catalogId"); Object queryString = request.get("queryString"); if (catalogId == null) return new SearchResult("The catalogID cannot be null"); if (queryString == null) return new SearchResult("The queryString items cannot be null"); List<QueryRule> rules = new ArrayList<QueryRule>(); for (String term : queryString.toString().split("\\s*")) { if (rules.size() > 0) rules.add(new QueryRule(Operator.AND)); rules.add(new QueryRule(Operator.SEARCH, term)); } return searchService.search(new SearchRequest("protocolTree-" + catalogId.toString(), new QueryImpl(rules) .pageSize(Integer.MAX_VALUE), null)); }
@RequestMapping(value = "/items", method = RequestMethod.POST) @ResponseBody public SearchResult getItemsFromIndex(@RequestBody Map<String, Object> request) { Object catalogId = request.get("catalogId"); Object draftItems = request.get("items"); if (catalogId == null) return new SearchResult("The catalogID cannot be null"); if (draftItems == null) return new SearchResult("The selected items cannot be null"); List<QueryRule> rules = new ArrayList<QueryRule>(); if (draftItems instanceof List<?>) { for (Object item : (List<?>) draftItems) { if (rules.size() > 0) rules.add(new QueryRule(Operator.OR)); rules.add(new QueryRule("id", Operator.EQUALS, item.toString())); } } return searchService.search(new SearchRequest("protocolTree-" + catalogId.toString(), new QueryImpl(rules) .pageSize(Integer.MAX_VALUE), null)); }
private void lexicalMatchOntologyTerms( String ontologyIri, Entity inputEntity, Entity ontologyEntity, int pageSize, List<QueryRule> rulesForOntologyTermFields, List<Entity> relevantEntities) { QueryRule disMaxQueryRule = new QueryRule(rulesForOntologyTermFields); disMaxQueryRule.setOperator(DIS_MAX); List<QueryRule> finalQueryRules = Arrays.asList( new QueryRule(OntologyTermMetaData.ONTOLOGY, EQUALS, ontologyEntity), new QueryRule(AND), disMaxQueryRule); Stream<Entity> lexicalMatchedOntologyTermEntities = dataService .findAll(ONTOLOGY_TERM, new QueryImpl<>(finalQueryRules).pageSize(pageSize)) .map( ontologyTerm -> addLexicalScoreToMatchedEntity( inputEntity, ontologyTerm, ontologyIri)); // TODO use findAll(ONTOLOGY_TERM, ..., OntologyTerm.class) lexicalMatchedOntologyTermEntities.forEach( matchedEntity -> { if (!relevantEntities.contains(matchedEntity)) { relevantEntities.add(matchedEntity); } }); }
public SearchResult findOntologyTerm(List<String> queryStrings) { QueryImpl query = new QueryImpl(); if (queryStrings.size() > 0) { for (String queryString : queryStrings) { if (query.getRules().size() > 0) query.addRule(new QueryRule(Operator.OR)); query.addRule(new QueryRule(ONTOLOGYTERM_SYNONYM, Operator.EQUALS, queryString)); } query.addRule(new QueryRule(Operator.AND)); query.addRule(new QueryRule(ENTITY_TYPE, Operator.EQUALS, "ontologyTerm")); query.pageSize(100); } return searchService.search(new SearchRequest(null, query, null)); }
public List<OntologyTerm> getChildOntologyTermsByNodePath( Entity ontologyEntity, Entity nodePathEntity) { String nodePath = nodePathEntity.getString(OntologyTermNodePathMetaData.NODE_PATH); Iterable<Entity> relatedOntologyTermEntities = () -> dataService .findAll( OntologyTermMetaData.ONTOLOGY_TERM, new QueryImpl<>( new QueryRule( OntologyTermMetaData.ONTOLOGY_TERM_NODE_PATH, Operator.FUZZY_MATCH, "\"" + nodePath + "\"")) .and() .eq(OntologyTermMetaData.ONTOLOGY, ontologyEntity)) .iterator(); Iterable<Entity> childOntologyTermEntities = FluentIterable.from(relatedOntologyTermEntities) .filter(entity -> qualifiedNodePath(nodePath, entity)) .toList(); return Lists.newArrayList( Iterables.transform(childOntologyTermEntities, OntologyTermRepository::toOntologyTerm)); }
public Set<String> findOntologyTermSynonyms(Hit ontologyTermHit) { String ontologyTermIRI = ontologyTermHit.getColumnValueMap().get(ONTOLOGY_TERM_IRI).toString(); Set<String> synonyms = new HashSet<String>(); QueryImpl query = new QueryImpl(); query.addRule(new QueryRule(ONTOLOGY_TERM_IRI, Operator.EQUALS, ontologyTermIRI)); query.pageSize(100000); SearchRequest searchRequest = new SearchRequest(null, query, null); SearchResult searchResult = searchService.search(searchRequest); for (Hit hit : searchResult.getSearchHits()) { synonyms.add(hit.getColumnValueMap().get(ONTOLOGYTERM_SYNONYM).toString().toLowerCase()); } return synonyms; }
/** Retrieves the query to get all index actions sorted */ static Query<IndexAction> createQueryGetAllIndexActions(String transactionId) { QueryRule rule = new QueryRule(INDEX_ACTION_GROUP_ATTR, EQUALS, transactionId); QueryImpl<IndexAction> q = new QueryImpl<>(rule); q.setSort(new Sort(ACTION_ORDER)); return q; }
public Hit getOntologyByIri(String ontologyIri) { QueryImpl q = new QueryImpl(); q.pageSize(Integer.MAX_VALUE); q.addRule(new QueryRule(OntologyIndexRepository.ENTITY_TYPE, Operator.EQUALS, OntologyIndexRepository.TYPE_ONTOLOGY)); q.addRule(new QueryRule(Operator.AND)); q.addRule(new QueryRule(OntologyIndexRepository.ONTOLOGY_IRI, Operator.EQUALS, ontologyIri)); SearchRequest searchRequest = new SearchRequest(AsyncOntologyIndexer.createOntologyDocumentType(ontologyIri), q, null); List<Hit> searchHits = searchService.search(searchRequest).getSearchHits(); if (searchHits.size() > 0) return searchHits.get(0); return new Hit(null, AsyncOntologyIndexer.createOntologyDocumentType(ontologyIri), Collections.<String, Object> emptyMap()); }
private List<Hit> findOntologyTerms(Hit candidateFeature) { Integer featureId = Integer.parseInt(candidateFeature.getColumnValueMap() .get(ObservableFeature.ID.toLowerCase()).toString()); ObservableFeature feature = dataService.findOne(ObservableFeature.ENTITY_NAME, featureId, ObservableFeature.class); QueryImpl query = new QueryImpl(); for (OntologyTerm ot : feature.getDefinitions()) { if (query.getRules().size() > 0) query.addRule(new QueryRule(Operator.OR)); query.addRule(new QueryRule(ONTOLOGY_TERM_IRI, Operator.EQUALS, ot.getTermAccession())); } return searchService.search(new SearchRequest(null, query, null)).getSearchHits(); }
/** * Create a boolean should query for composite tags containing multiple ontology terms * * @param multiOntologyTermIri * @return return a boolean should queryRule */ public QueryRule createShouldQueryRule(String multiOntologyTermIri) { QueryRule shouldQueryRule = new QueryRule(new ArrayList<QueryRule>()); shouldQueryRule.setOperator(Operator.SHOULD); for (String ontologyTermIri : multiOntologyTermIri.split(COMMA_CHAR)) { OntologyTerm ontologyTerm = ontologyService.getOntologyTerm(ontologyTermIri); List<String> queryTerms = parseOntologyTermQueries(ontologyTerm); Double termFrequency = getBestInverseDocumentFrequency(queryTerms); shouldQueryRule.getNestedRules().add(createBoostedDisMaxQueryRuleForTerms(queryTerms, termFrequency)); } return shouldQueryRule; }
/** * Create a boolean should query for composite tags containing multiple ontology terms * * @return return a boolean should queryRule */ public QueryRule createShouldQueryRule(String multiOntologyTermIri) { QueryRule shouldQueryRule = new QueryRule(new ArrayList<>()); shouldQueryRule.setOperator(Operator.SHOULD); for (String ontologyTermIri : multiOntologyTermIri.split(COMMA_CHAR)) { OntologyTerm ontologyTerm = ontologyService.getOntologyTerm(ontologyTermIri); List<String> queryTerms = parseOntologyTermQueries(ontologyTerm); Double termFrequency = getBestInverseDocumentFrequency(queryTerms); shouldQueryRule .getNestedRules() .add(createBoostedDisMaxQueryRuleForTerms(queryTerms, termFrequency)); } return shouldQueryRule; }
public List<Ontology> getAllOntologies() { List<Ontology> ontologies = new ArrayList<Ontology>(); QueryImpl q = new QueryImpl(); q.pageSize(Integer.MAX_VALUE); q.addRule(new QueryRule(OntologyIndexRepository.ENTITY_TYPE, Operator.EQUALS, OntologyIndexRepository.TYPE_ONTOLOGY)); SearchRequest searchRequest = new SearchRequest(null, q, null); for (Hit hit : searchService.search(searchRequest).getSearchHits()) { Ontology ontology = new Ontology(); ontology.setIdentifier(hit.getColumnValueMap().get(OntologyIndexRepository.ONTOLOGY_IRI).toString()); ontology.setOntologyURI(hit.getColumnValueMap().get(OntologyIndexRepository.ONTOLOGY_IRI).toString()); ontology.setName(hit.getColumnValueMap().get(OntologyIndexRepository.ONTOLOGY_NAME).toString()); ontologies.add(ontology); } return ontologies; }
private SearchResult findAllFeatures(Integer targetDataSetId) { QueryImpl query = new QueryImpl(); query.addRule(new QueryRule(ENTITY_TYPE, Operator.EQUALS, ObservableFeature.class.getSimpleName().toLowerCase())); query.pageSize(1000000); DataSet dataSet = dataService.findOne(DataSet.ENTITY_NAME, targetDataSetId, DataSet.class); return searchService .search(new SearchRequest(CATALOGUE_PREFIX + dataSet.getProtocolUsed().getId(), query, null)); }
private Set<String> retrieveUnits(OntologyTerm ot) { Set<String> extractedUnitObjects = new HashSet<String>(); QueryImpl query = new QueryImpl(); query.pageSize(10000); query.addRule(new QueryRule(ONTOLOGY_TERM_IRI, Operator.EQUALS, ot.getTermAccession())); SearchRequest searchRequest = new SearchRequest(UNIT_DOCUMENT_TYPE, query, null); SearchResult result = searchService.search(searchRequest); for (Hit hit : result.getSearchHits()) { Map<String, Object> columnValueMap = hit.getColumnValueMap(); extractedUnitObjects .add(processUnitName(columnValueMap.get(ONTOLOGYTERM_SYNONYM).toString().toLowerCase())); } if (!extractedUnitObjects.contains(ot.getName())) { extractedUnitObjects.add(processUnitName(ot.getName())); } return extractedUnitObjects; }
@Async @RunAsSystem @Transactional public void applyAlgorithm(String userName, Integer targetDataSetId, List<Integer> sourceDataSetIds) { currentUserStatus.setUserCurrentStage(userName, STAGE.DeleteMapping); removeExistingDerivedDataSets(userName, targetDataSetId, sourceDataSetIds); currentUserStatus.setUserCurrentStage(userName, STAGE.CreateMapping); createDerivedDataSets(userName, targetDataSetId, sourceDataSetIds); SearchResult allFeaturesResult = findAllFeatures(targetDataSetId); currentUserStatus.setUserTotalNumberOfQueries(userName, allFeaturesResult.getTotalHitCount() * sourceDataSetIds.size()); QueryImpl query = new QueryImpl(); query.pageSize(Integer.MAX_VALUE); for (Hit hit : allFeaturesResult.getSearchHits()) { if (query.getRules().size() > 0) query.addRule(new QueryRule(Operator.OR)); query.addRule(new QueryRule(STORE_MAPPING_FEATURE, Operator.EQUALS, hit.getColumnValueMap().get(ENTITY_ID))); } generateValues(userName, query, targetDataSetId, sourceDataSetIds); currentUserStatus.setUserCurrentStage(userName, STAGE.StoreMapping); searchService.indexRepository(new DataSetMatrixRepository(dataService, createDerivedDataSetIdentifier(userName, targetDataSetId.toString(), StringUtils.join(sourceDataSetIds, '-')))); currentUserStatus.setUserIsRunning(userName, false); }