private long processAncestorsAtDistance(int localDistance, Set<VocabularyTerm> sourceUnprocessedAncestors, Map<String, Integer> sourceDistanceMap, Map<String, Integer> targetDistanceMap) { long minDistance = Long.MAX_VALUE; Set<VocabularyTerm> nextLevel = new HashSet<>(); for (VocabularyTerm term : sourceUnprocessedAncestors) { for (VocabularyTerm parent : term.getParents()) { if (sourceDistanceMap.containsKey(parent.getId())) { continue; } if (targetDistanceMap.containsKey(parent.getId())) { minDistance = Math.min(minDistance, targetDistanceMap.get(parent.getId()) + localDistance); } nextLevel.add(parent); sourceDistanceMap.put(parent.getId(), localDistance); } } sourceUnprocessedAncestors.clear(); sourceUnprocessedAncestors.addAll(nextLevel); return minDistance; }
/** * How much information is captured by a feature? In other words, how many diseases are selected by a feature out of * the total selectable diseases. If a feature doesn't select any diseases at all, the information content of its * nearest represented ancestor is considered, with a slight boost for even more specificity. * * @param f the target feature to measure * @return the information content captured by this term */ private double informationContent(Feature f) { String toSearch = f.getId(); double ic = informationContent(this.omim.count(Collections.singletonMap(SEARCH_FOR, toSearch))); int i = 0; while (ic == 0 && ++i < 5) { VocabularyTerm term = this.hpo.getTerm(toSearch); if (term == null) { break; } Set<VocabularyTerm> parents = term.getParents(); if (parents.isEmpty()) { break; } toSearch = parents.iterator().next().getId(); ic = informationContent(this.omim.count(Collections.singletonMap(SEARCH_FOR, toSearch))); } return ic * (1 + i / 5); }
/** * How much information is captured by a feature? In other words, how many diseases are selected by a feature out of * the total selectable diseases. If a feature doesn't select any diseases at all, the information content of its * nearest represented ancestor is considered, with a slight boost for even more specificity. * * @param f the target feature to measure * @return the information content captured by this term */ private double informationContent(Feature f) { String toSearch = f.getId(); double ic = informationContent(this.omim.count(Collections.singletonMap(SEARCH_FOR, toSearch))); int i = 0; while (ic == 0 && ++i < 5) { VocabularyTerm term = this.hpo.getTerm(toSearch); if (term == null) { break; } Set<VocabularyTerm> parents = term.getParents(); if (parents.isEmpty()) { break; } toSearch = parents.iterator().next().getId(); ic = informationContent(this.omim.count(Collections.singletonMap(SEARCH_FOR, toSearch))); } return ic * (1 + i / 5); }