/** * Returns this document's {@link #LANGUAGE}. */ public LanguageCode getLanguage() { return getField(LANGUAGE); }
public boolean apply(Document document) { final Object fieldValue = document.getField(fieldName); if (fieldValue == null) { return false; } else { return unique.add(fieldValue); } } }
@SuppressWarnings("unchecked") protected static String getTopic(Document document) { return ((List<String>) document.getField(Document.PARTITIONS)).get(0); }
/** * Returns this document's {@link #TITLE} field. */ @JsonProperty @Element(required = false) public String getTitle() { return getField(TITLE); }
/** * Returns this document's {@link #SCORE}. * * @return this document's {@link #SCORE}. */ @Attribute(name = "score", required = false) public Double getScore() { return getField(SCORE); }
@SuppressWarnings("unchecked") protected Set<Object> getTopicIds(final List<Document> documents) { final Set<Object> topicIds = Sets.newHashSet(); for (Document document : documents) { topicIds.addAll((Collection<? extends Object>) document .<Object> getField(Document.PARTITIONS)); } return topicIds; }
/** * Returns this document's {@link #CONTENT_URL} field. */ @JsonProperty("url") @Element(name = "url", required = false) public String getContentUrl() { return getField(CONTENT_URL); }
/** * Returns this document's {@link #SUMMARY} field. */ @JsonProperty("snippet") @Element(name = "snippet", required = false) public String getSummary() { return getField(SUMMARY); }
/** * Returns this document's {@link #SOURCES} field. */ @JsonProperty @ElementList(entry = "source", required = false) public List<String> getSources() { return getField(SOURCES); }
Set<Object> getPartitions(List<Document> documents) { final HashSet<Object> partitions = Sets.newHashSet(); for (Document document : documents) { final Collection<Object> documentPartitions = document .<Collection<Object>> getField(partitionIdFieldName); if (documentPartitions != null) { partitions.addAll(documentPartitions); } } return partitions; }
/** * Unescape HTML entities and tags from a given set of <code>fields</code> of all * documents in the provided <code>response</code>. * * @param response the search engine response to clean * @param keepHighlights set to <code>true</code> to keep query terms highlights * @param fields names of fields to clean */ protected static void clean(SearchEngineResponse response, boolean keepHighlights, String... fields) { for (Document document : response.results) { for (String field : fields) { final String originalField = document.getField(field); if (StringUtils.isNotBlank(originalField)) { String cleanedField = originalField; if (!keepHighlights) { final Matcher matcher = HIGHLIGHTS_PATTERN.matcher(cleanedField); cleanedField = matcher.replaceAll(""); } cleanedField = StringEscapeUtils.escapeHtml4(cleanedField); document.setField(field, cleanedField); } } } }
for (int i = 0; i < documents.length; i++) final String url = documents[i].getField(Document.CONTENT_URL); if (url == null)
@SuppressWarnings("unchecked") double calculate(Cluster cluster, int partitionCount) { int clusterPartitionAssignments = 0; for (Document document : cluster.getAllDocuments()) { clusterPartitionAssignments += ((Collection<Object>) document .getField(Document.PARTITIONS)).size(); } final double worstCaseH = calculateWorstCaseH(clusterPartitionAssignments, partitionCount); if (worstCaseH == 0) { return 0; } else { return calculateH(cluster) / worstCaseH; } }
/** * Returns documents grouped by partitions. */ SetMultimap<Object, Document> getDocumentsByPartition(List<Document> documents) { final SetMultimap<Object, Document> index = HashMultimap.create(); for (Document document : documents) { final Collection<Object> partitions = document.getField(partitionIdFieldName); for (Object partition : partitions) { index.put(partition, document); } } return ImmutableSetMultimap.copyOf(index); }
private void clustersToNamedList(List<Cluster> outputClusters, List parent, boolean outputSubClusters, int maxLabels) { for (Cluster outCluster : outputClusters) { NamedList cluster = new SimpleOrderedMap(); parent.add(cluster); List<String> labels = outCluster.getPhrases(); if (labels.size() > maxLabels) labels = labels.subList(0, maxLabels); cluster.add("labels", labels); List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments(); List docList = new ArrayList(); cluster.add("docs", docList); for (Document doc : docs) { docList.add(doc.getField("solrId")); } if (outputSubClusters) { List subclusters = new ArrayList(); cluster.add("clusters", subclusters); clustersToNamedList(outCluster.getSubclusters(), subclusters, outputSubClusters, maxLabels); } } }
/** * Performs by URL clustering. */ @Override public void process() throws ProcessingException { final Map<Object, Cluster> clusterMap = Maps.newHashMap(); for (Document document : documents) { final Object field = document.getField(fieldName); if (field instanceof Collection<?>) { for (Object value : (Collection<?>) field) { addToCluster(clusterMap, value, document); } } else { addToCluster(clusterMap, field, document); } } clusters = Lists.newArrayList(clusterMap.values()); Collections.sort(clusters, Cluster.BY_REVERSED_SIZE_AND_LABEL_COMPARATOR); Cluster.appendOtherTopics(documents, clusters); }
cluster.add("docs", docList); for (Document doc : docs) { docList.add(doc.getField(SOLR_DOCUMENT_ID));
final String fieldValue = doc.getField(fieldName);
builder.field("_index", document.getField("_index")); builder.field("_type", document.getField("_type")); builder.field("_id", document.getField("_id")); builder.field("_title",document.getField("_title")); builder.field("_summary",document.getField("_summary")); builder.field("_url",document.getField("_url"));