protected void handleResponse(BingResponse response, SearchEngineResponse ser) { SearchResponse searchResponse = (SearchResponse) response; if (searchResponse.webPages != null) { ser.metadata.put(SearchEngineResponse.RESULTS_TOTAL_KEY, searchResponse.webPages.totalEstimatedMatches); for (SearchResponse.WebPages.Result r : searchResponse.webPages.value) { Document doc = new Document(r.name, r.snippet, r.displayUrl); if (r.displayUrl != null) { doc.setField(Document.CLICK_URL, r.url); } ser.results.add(doc); } } else { ser.metadata.put(SearchEngineResponse.RESULTS_TOTAL_KEY, 0); } } }
/** * Returns this document's {@link #LANGUAGE}. */ public LanguageCode getLanguage() { return getField(LANGUAGE); }
@Override protected void afterFetch(SearchEngineResponse response) { for (Document document : response.results) { document.setLanguage(LanguageCode.ENGLISH); } }
/** * Unescape HTML entities and tags from a given set of <code>fields</code> of all * documents in the provided <code>response</code>. * * @param response the search engine response to clean * @param keepHighlights set to <code>true</code> to keep query terms highlights * @param fields names of fields to clean */ protected static void clean(SearchEngineResponse response, boolean keepHighlights, String... fields) { for (Document document : response.results) { for (String field : fields) { final String originalField = document.getField(field); if (StringUtils.isNotBlank(originalField)) { String cleanedField = originalField; if (!keepHighlights) { final Matcher matcher = HIGHLIGHTS_PATTERN.matcher(cleanedField); cleanedField = matcher.replaceAll(""); } cleanedField = StringEscapeUtils.escapeHtml4(cleanedField); document.setField(field, cleanedField); } } } }
String summary =""; String url = null; Document doc = new Document(); doc.setField("_url", url); doc.setLanguage(LanguageCode.valueOf(req.Language.trim().toUpperCase())); doc.setTitle(title); doc.setSummary(summary); doc.setField("_index", hit.index()); doc.setField("_type", hit.type()); doc.setField("_id", hit.id()); doc.setField("_title",title); doc.setField("_summary",summary); doc.setField("_url",url); builder.field("_index", document.getField("_index")); builder.field("_type", document.getField("_type")); builder.field("_id", document.getField("_id")); builder.field("_title",document.getField("_title")); builder.field("_summary",document.getField("_summary")); builder.field("_url",document.getField("_url"));
Document carrotDocument = new Document(getConcatenated(sdoc, titleFieldSpec), snippet, ObjectUtils.toString(sdoc.getFieldValue(urlField), "")); carrotDocument.setField(SOLR_DOCUMENT_ID, sdoc.getFieldValue(idFieldName)); carrotDocument.setLanguage(carrot2Language); break; carrotDocument.setField(entry.getValue(), sdoc.getFieldValue(entry.getKey()));
final Document doc = new Document(); final org.apache.lucene.document.Document luceneDoc = searcher .doc(scoreDoc.doc); doc.setScore((double) scoreDoc.score); doc.setField(LUCENE_DOCUMENT_FIELD, luceneDoc); doc.addSerializationListener(removeLuceneDocument);
final Document document = new Document(); document.setField(Document.CONTENT_URL, split[1]); document.setField(Document.TITLE, split[2]); if (split.length > 3) document.setField(Document.SUMMARY, split[3]); .setField( Document.PARTITIONS, ImmutableList allDocuments.addAll(docList); Document.assignDocumentIds(allDocuments);
/** * Sets this document's {@link #LANGUAGE}. * * @param language the language to set * @return this document for convenience */ public Document setLanguage(LanguageCode language) { return setField(LANGUAGE, language); }
/** * Creates a <strong>shallow</strong> clone of itself. The identifier * and the fields map is copied but values inside fields are not cloned. */ @Override public Document clone() { Document clone = new Document(); clone.id = this.id; clone.fields.putAll(this.fields); return clone; }
/** * Creates a {@link ProcessingResult} with the provided <code>attributes</code>. * Assigns unique document identifiers if documents are present in the * <code>attributes</code> map (under the key {@link AttributeNames#DOCUMENTS}). */ @SuppressWarnings("unchecked") ProcessingResult(Map<String, Object> attributes) { this.attributes = attributes; // Replace a modifiable collection of documents with an unmodifiable one final List<Document> documents = (List<Document>) attributes .get(AttributeNames.DOCUMENTS); if (documents != null) { Document.assignDocumentIds(documents); attributes.put(AttributeNames.DOCUMENTS, Collections.unmodifiableList(documents)); } // Replace a modifiable collection of clusters with an unmodifiable one final List<Cluster> clusters = (List<Cluster>) attributes .get(AttributeNames.CLUSTERS); if (clusters != null) { Cluster.assignClusterIds(clusters); attributes.put(AttributeNames.CLUSTERS, Collections.unmodifiableList(clusters)); } // Store a reference to attributes as an unmodifiable map this.attributesView = Collections.unmodifiableMap(attributes); }
/** * Creates a document with the provided <code>title</code>, <code>summary</code>, * <code>contentUrl</code> and <code>language</code>. */ public Document(String title, String summary, String contentUrl, LanguageCode language) { setField(TITLE, title); setField(SUMMARY, summary); if (StringUtils.isNotBlank(contentUrl)) { setField(CONTENT_URL, contentUrl); } if (language != null) { setField(LANGUAGE, language); } }
@Override public void afterElement(String localName, String path, String text) { if (pmid != null) { response.results.add(new Document(title, body.toString(), "http://www.ncbi.nlm.nih.gov/pubmed/" + pmid, null, pmid)); } else { LoggerFactory.getLogger(PubMedContentHandler.class).warn("No PMID on a <PubmedArticle>?"); } } });
@SuppressWarnings("unchecked") protected static String getTopic(Document document) { return ((List<String>) document.getField(Document.PARTITIONS)).get(0); }
@Override protected void handleResponse(BingResponse response, SearchEngineResponse ser) { NewsResponse newsResponse = (NewsResponse) response; ser.metadata.put(SearchEngineResponse.RESULTS_TOTAL_KEY, newsResponse.totalEstimatedMatches); if (newsResponse.value != null) { ArrayDeque<NewsResponse.NewsArticle> articles = new ArrayDeque<>(newsResponse.value); while (!articles.isEmpty()) { NewsResponse.NewsArticle r = articles.removeFirst(); if (r.clusteredArticles != null) { articles.addAll(r.clusteredArticles); } Document doc = new Document(r.name, r.description, r.url); if (r.image != null && r.image.thumbnail != null) { doc.setField(Document.THUMBNAIL_URL, r.image.thumbnail.contentUrl); } if (r.provider != null) { ArrayList<String> sources = new ArrayList<>(); for (NewsResponse.NewsArticle.Organization o : r.provider) { sources.add(o.name); } doc.setField(Document.SOURCES, sources); } ser.results.add(doc); } } } }
/** * Sets this document's {@link #TITLE} field. * * @param title title to set * @return this document for convenience */ @Element(required = false) public Document setTitle(String title) { return setField(TITLE, title); }
protected void processInternal(FubTestCollection data, int topicId, int requestedResults) throws ProcessingException { this.documents = data.getDocumentsForTopic(topicId, requestedResults, minTopicSize, includeDocumentsWithoutTopic); for (Document document : documents) { document.setLanguage(LanguageCode.ENGLISH); } } }
public boolean apply(Document document) { final Object fieldValue = document.getField(fieldName); if (fieldValue == null) { return false; } else { return unique.add(fieldValue); } } }