entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer); entry.setTextFragmenter(fragmenter); entry.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false, numberOfFragments); for (TextFragment bestTextFragment : bestTextFragments) { if (bestTextFragment != null && bestTextFragment.getScore() > 0) {
MySuggestionFormatter formatter = new MySuggestionFormatter(theText, suggestions); Scorer scorer = new QueryScorer(textQuery); Highlighter highlighter = new Highlighter(formatter, scorer); Analyzer analyzer = em.getSearchFactory().getAnalyzer(ResourceTable.class); highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10); highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10); highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10);
public String toHighlighter(Query query,Document doc,String field,Analyzer analyzer){ try { SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font>", "</font>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query)); TokenStream tokenStream1 = analyzer.tokenStream("text",new StringReader(doc.get(field))); String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field)); return highlighterStr == null ? doc.get(field):highlighterStr; } catch (IOException e) { e.printStackTrace(); return null; } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); return null; } }
StandardHLImpl(boolean termVecs) { highlighter.setEncoder(new DefaultEncoder()); highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze); this.termVecs = termVecs; }
protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream, String text, int maxNumFragments ) throws IOException { Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) ); highlighter.setTextFragmenter( new OneLineFragmenter() ); tokenStream.reset(); maxNumFragments = Math.max( 1, maxNumFragments ); // sanity check TextFragment[] frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments ); // Get text ArrayList<String> fragTexts = new ArrayList<String>( maxNumFragments ); for ( int i = 0; i < frag.length; i++ ) { if ( ( frag[i] != null ) && ( frag[i].getScore() > 0 ) ) { fragTexts.add( frag[i].toString() ); } } return fragTexts; }
QueryScorer scorer = new QueryScorer(query);//计算查询结果最高的得分 Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);//根据得分算出一个片段 Highlighter highlighter = new Highlighter(simpleHTMLFormatter,scorer); highlighter.setTextFragmenter(fragmenter);//设置显示高亮的片段 if(tcontent != null){ TokenStream tokenStream = analyzer.tokenStream("tcontent", new StringReader(tcontent)); String summary = highlighter.getBestFragment(tokenStream, tcontent); System.out.println(summary);
/** * Return a phrase Highlighter appropriate for this field. * @param query The current Query * @param fieldName The name of the field * @param request The current SolrQueryRequest * @param tokenStream document text CachingTokenStream * @throws IOException */ protected Highlighter getPhraseHighlighter(Query query, String fieldName, SolrQueryRequest request, CachingTokenFilter tokenStream) throws IOException { SolrParams params = request.getParams(); Highlighter highlighter = null; highlighter = new Highlighter(getFormatter(fieldName, params), getSpanQueryScorer(query, fieldName, tokenStream, request)); highlighter.setTextFragmenter(getFragmenter(fieldName, params)); return highlighter; }
private HQuery(Query query) { checkNotNull(query, "The rewritten query to highlight must be provided"); this.highlighter = new Highlighter(formatter, new QueryScorer(query)); }
Highlighter highlighter = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(len); highlighter.setTextFragmenter(fragmenter); text = highlighter.getBestFragments(tokenStream, pText, maxFragments, FRAGMENT_SEPARATOR); } catch (InvalidTokenOffsetsException e) { logger.debug("error : {} / pText:{} / query:{} / result:{}", e.getMessage(), pText, query);
protected Highlighter createHighlighter(String propertyName) throws SearchEngineException { Highlighter highlighter = new Highlighter(highlighterSettings.getFormatter(), highlighterSettings.getEncoder(), createScorer(propertyName)); Fragmenter f = highlighterSettings.getFragmenter(); highlighter.setTextFragmenter(f); if (maxBytesToAnalyze == -1) { highlighter.setMaxDocBytesToAnalyze(highlighterSettings.getMaxBytesToAnalyze()); } else { highlighter.setMaxDocBytesToAnalyze(maxBytesToAnalyze); } return highlighter; }
/** * Highlights chosen terms in a text, extracting the most relevant section. The document text is analysed in chunks to record * hit statistics across the document. After accumulating stats, the fragment with the highest score is returned * * @param tokenStream * a stream of tokens identified in the text parameter, including offset information. This is typically produced by * an analyzer re-parsing a document's text. Some work may be done on retrieving TokenStreams more efficiently by * adding support for storing original text position data in the Lucene index but this support is not currently * available (as of Lucene 1.4 rc2). * @param text * text to highlight terms in * * @return highlighted text fragment or null if no terms found * @throws InvalidTokenOffsetsException * thrown if any token's endOffset exceeds the provided text's length */ public final String getBestFragment(TokenStream tokenStream, String text) throws IOException, InvalidTokenOffsetsException { String[] results = getBestFragments(tokenStream, text, 1); if (results.length > 0) { return results[0]; } return null; }
@Override public Multimap<String, String> getFragments(Document doc) { final Multimap<String, String> fragments = ArrayListMultimap.create(); for (Map.Entry<String, Integer> entry : fields.entrySet()) { final String field = entry.getKey(); final Integer maxNumFragments = entry.getValue(); final String text = doc.get(field); if (text != null) { try { highlighter.setTextFragmenter(maxNumFragments > 0 ? new SimpleFragmenter() : new NullFragmenter()); String[] fr = highlighter.getBestFragments(analyzer, field, text, maxNumFragments); if (fr != null && fr.length > 0) { fragments.putAll(field, Arrays.asList(fr)); } } catch (IOException e) { } catch (InvalidTokenOffsetsException e) { } } } // TODO: fix exceptions. return fragments; }
private void resetHighlighter() { if (formatter != null) { this.highlighter = new Highlighter(formatter, new QueryScorer(query)); this.highlighter.setEncoder(new DefaultEncoder()); } else { this.highlighter = null; } } }
/** * Highlights chosen terms in a text, extracting the most relevant sections. The document text is analysed in chunks to record * hit statistics across the document. After accumulating stats, the fragments with the highest scores are returned as an * array of strings in order of score (contiguous fragments are merged into one in their original order to improve * readability) * * @param text * text to highlight terms in * @param maxNumFragments * the maximum number of fragments. * * @return highlighted text fragments (between 0 and maxNumFragments number of fragments) * @throws InvalidTokenOffsetsException * thrown if any token's endOffset exceeds the provided text's length */ public final String[] getBestFragments(TokenStream tokenStream, String text, int maxNumFragments) throws IOException, InvalidTokenOffsetsException { maxNumFragments = Math.max(1, maxNumFragments); // sanity check TextFragment[] frag = getBestTextFragments(tokenStream, text, true, maxNumFragments); // Get text ArrayList<String> fragTexts = new ArrayList<String>(); for (int i = 0; i < frag.length; i++) { if ((frag[i] != null) && (frag[i].getScore() > 0)) { fragTexts.add(frag[i].toString()); } } return fragTexts.toArray(new String[0]); }
highlighter.setTextFragmenter( new SimpleSpanFragmenter(scorer) ); analyzer2); String fragment = highlighter.getBestFragment(stream, title);
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document d = searcher.doc(docId); String text = doc.get("contents"); String bestFrag = highlighter.getBestFragment(analyzer, "contents", text); //output, however you like.
Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE); if (maxCharsToAnalyze < 0) { highlighter.setMaxDocCharsToAnalyze(docTexts[j].length()); } else { highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze); TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, docTexts[j], mergeContiguousFragments, numFragments); for (int k = 0; k < bestTextFragments.length; k++) { if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
@Override public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception { IndexReader reader = searcher.getIndexReader(); highlighter.setFragmentScorer(new QueryScorer(q)); // highlighter.setTextFragmenter(); unfortunately no sentence mechanism, not even regex. Default here is trivial for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) { Document document = reader.document(scoreDoc.doc, hlFields); Fields tvFields = termVecs ? reader.getTermVectors(scoreDoc.doc) : null; for (IndexableField indexableField : document) { TokenStream tokenStream; if (termVecs) { tokenStream = TokenSources.getTokenStream(indexableField.name(), tvFields, indexableField.stringValue(), analyzer, maxDocCharsToAnalyze); } else { tokenStream = analyzer.tokenStream(indexableField.name(), indexableField.stringValue()); } // will close TokenStream: String[] fragments = highlighter.getBestFragments(tokenStream, indexableField.stringValue(), maxFrags); preventOptimizeAway = fragments.length; } } } }
protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream, String text, int maxNumFragments ) throws IOException { Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) ); highlighter.setTextFragmenter( new OneLineFragmenter() ); maxNumFragments = Math.max( 1, maxNumFragments ); // sanity check TextFragment[] frag; // Get text ArrayList<String> fragTexts = new ArrayList<String>( maxNumFragments ); try { frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments ); for ( int i = 0; i < frag.length; i++ ) { if ( ( frag[i] != null ) && ( frag[i].getScore() > 0 ) ) { fragTexts.add( frag[i].toString() ); } } } catch ( InvalidTokenOffsetsException e ) { // empty? } return fragTexts; }
Highlighter highlighter = new Highlighter(FORMATTER, scorer); highlighter.setTextFragmenter(new NullFragmenter()); highlightedDescMap.put(webSearchable, highlighter.getBestFragment( tokenStream, highlightString));