/** * Sets this document's {@link #LANGUAGE}. * * @param language the language to set * @return this document for convenience */ public Document setLanguage(LanguageCode language) { return setField(LANGUAGE, language); }
/** * Creates a document with the provided <code>title</code>, <code>summary</code>, * <code>contentUrl</code> and <code>language</code>. */ public Document(String title, String summary, String contentUrl, LanguageCode language) { setField(TITLE, title); setField(SUMMARY, summary); if (StringUtils.isNotBlank(contentUrl)) { setField(CONTENT_URL, contentUrl); } if (language != null) { setField(LANGUAGE, language); } }
/** * Sets this document's {@link #CONTENT_URL} field. * * @param contentUrl content URL to set * @return this document for convenience */ @Element(name = "url", required = false) public Document setContentUrl(String contentUrl) { return setField(CONTENT_URL, contentUrl); }
/** * Sets this document's {@link #SOURCES} field. * * @param sources the sources list to set * @return this document for convenience */ @ElementList(entry = "source", required = false) public Document setSources(List<String> sources) { return setField(SOURCES, sources); }
/** * Sets this document's {@link #SCORE}. * * @param score the {@link #SCORE} to set * @return this document for convenience. */ @Attribute(name = "score", required = false) public Document setScore(Double score) { return setField(SCORE, score); }
/** * Sets this document's {@link #TITLE} field. * * @param title title to set * @return this document for convenience */ @Element(required = false) public Document setTitle(String title) { return setField(TITLE, title); }
/** * Sets this document's {@link #SUMMARY} field. * * @param summary summary to set * @return this document for convenience */ @Element(name = "snippet", required = false) public Document setSummary(String summary) { return setField(SUMMARY, summary); }
protected void handleResponse(BingResponse response, SearchEngineResponse ser) { SearchResponse searchResponse = (SearchResponse) response; if (searchResponse.webPages != null) { ser.metadata.put(SearchEngineResponse.RESULTS_TOTAL_KEY, searchResponse.webPages.totalEstimatedMatches); for (SearchResponse.WebPages.Result r : searchResponse.webPages.value) { Document doc = new Document(r.name, r.snippet, r.displayUrl); if (r.displayUrl != null) { doc.setField(Document.CLICK_URL, r.url); } ser.results.add(doc); } } else { ser.metadata.put(SearchEngineResponse.RESULTS_TOTAL_KEY, 0); } } }
/** * Unescape HTML entities and tags from a given set of <code>fields</code> of all * documents in the provided <code>response</code>. * * @param response the search engine response to clean * @param keepHighlights set to <code>true</code> to keep query terms highlights * @param fields names of fields to clean */ protected static void clean(SearchEngineResponse response, boolean keepHighlights, String... fields) { for (Document document : response.results) { for (String field : fields) { final String originalField = document.getField(field); if (StringUtils.isNotBlank(originalField)) { String cleanedField = originalField; if (!keepHighlights) { final Matcher matcher = HIGHLIGHTS_PATTERN.matcher(cleanedField); cleanedField = matcher.replaceAll(""); } cleanedField = StringEscapeUtils.escapeHtml4(cleanedField); document.setField(field, cleanedField); } } } }
@Override protected void handleResponse(BingResponse response, SearchEngineResponse ser) { NewsResponse newsResponse = (NewsResponse) response; ser.metadata.put(SearchEngineResponse.RESULTS_TOTAL_KEY, newsResponse.totalEstimatedMatches); if (newsResponse.value != null) { ArrayDeque<NewsResponse.NewsArticle> articles = new ArrayDeque<>(newsResponse.value); while (!articles.isEmpty()) { NewsResponse.NewsArticle r = articles.removeFirst(); if (r.clusteredArticles != null) { articles.addAll(r.clusteredArticles); } Document doc = new Document(r.name, r.description, r.url); if (r.image != null && r.image.thumbnail != null) { doc.setField(Document.THUMBNAIL_URL, r.image.thumbnail.contentUrl); } if (r.provider != null) { ArrayList<String> sources = new ArrayList<>(); for (NewsResponse.NewsArticle.Organization o : r.provider) { sources.add(o.name); } doc.setField(Document.SOURCES, sources); } ser.results.add(doc); } } } }
doc.setField(LUCENE_DOCUMENT_FIELD, luceneDoc); doc.addSerializationListener(removeLuceneDocument);