org.apache.lucene.search.highlight.Highlighter java code examples

entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer);
entry.setTextFragmenter(fragmenter);
entry.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
    TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false, numberOfFragments);
    for (TextFragment bestTextFragment : bestTextFragments) {
      if (bestTextFragment != null && bestTextFragment.getScore() > 0) {

MySuggestionFormatter formatter = new MySuggestionFormatter(theText, suggestions);
Scorer scorer = new QueryScorer(textQuery);
Highlighter highlighter = new Highlighter(formatter, scorer);
Analyzer analyzer = em.getSearchFactory().getAnalyzer(ResourceTable.class);
highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10);
highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10);
highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10);

public String toHighlighter(Query query,Document doc,String field,Analyzer analyzer){
  try {
    SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font>", "</font>");
    Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));
    TokenStream tokenStream1 = analyzer.tokenStream("text",new StringReader(doc.get(field)));
    String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));
    return highlighterStr == null ? doc.get(field):highlighterStr;
  } catch (IOException e) {
    e.printStackTrace();
    return null;
  } catch (InvalidTokenOffsetsException e) {
    e.printStackTrace();
    return null;
  }
}

StandardHLImpl(boolean termVecs) {
 highlighter.setEncoder(new DefaultEncoder());
 highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
 this.termVecs = termVecs;
}

protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream,
                        String text, int maxNumFragments )
  throws IOException
{
  Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) );
  highlighter.setTextFragmenter( new OneLineFragmenter() );
  tokenStream.reset();
  maxNumFragments = Math.max( 1, maxNumFragments ); // sanity check
  TextFragment[] frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments );
  // Get text
  ArrayList<String> fragTexts = new ArrayList<String>( maxNumFragments );
  for ( int i = 0; i < frag.length; i++ )
  {
    if ( ( frag[i] != null ) && ( frag[i].getScore() > 0 ) )
    {
      fragTexts.add( frag[i].toString() );
    }
  }
  return fragTexts;
}

QueryScorer scorer = new QueryScorer(query);//计算查询结果最高的得分
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);//根据得分算出一个片段
Highlighter highlighter = new Highlighter(simpleHTMLFormatter,scorer);
highlighter.setTextFragmenter(fragmenter);//设置显示高亮的片段
  if(tcontent != null){
    TokenStream tokenStream =  analyzer.tokenStream("tcontent", new StringReader(tcontent));
    String summary = highlighter.getBestFragment(tokenStream, tcontent);
    System.out.println(summary);

/**
 * Return a phrase Highlighter appropriate for this field.
 * @param query The current Query
 * @param fieldName The name of the field
 * @param request The current SolrQueryRequest
 * @param tokenStream document text CachingTokenStream
 * @throws IOException 
 */
protected Highlighter getPhraseHighlighter(Query query, String fieldName, SolrQueryRequest request, CachingTokenFilter tokenStream) throws IOException {
 SolrParams params = request.getParams();
 Highlighter highlighter = null;
 
 highlighter = new Highlighter(getFormatter(fieldName, params), getSpanQueryScorer(query, fieldName, tokenStream, request));
 
 highlighter.setTextFragmenter(getFragmenter(fieldName, params));
 return highlighter;
}

private HQuery(Query query) {
  checkNotNull(query, "The rewritten query to highlight must be provided");
  this.highlighter = new Highlighter(formatter, new QueryScorer(query));
}

Highlighter highlighter = new Highlighter(formatter, scorer);
Fragmenter fragmenter = new SimpleFragmenter(len);
highlighter.setTextFragmenter(fragmenter);
  text = highlighter.getBestFragments(tokenStream, pText, maxFragments, FRAGMENT_SEPARATOR);
} catch (InvalidTokenOffsetsException e) {
  logger.debug("error : {} / pText:{} / query:{} / result:{}", e.getMessage(),  pText, query);

protected Highlighter createHighlighter(String propertyName) throws SearchEngineException {
  Highlighter highlighter = new Highlighter(highlighterSettings.getFormatter(), highlighterSettings.getEncoder(),
      createScorer(propertyName));
  Fragmenter f = highlighterSettings.getFragmenter();
  highlighter.setTextFragmenter(f);
  if (maxBytesToAnalyze == -1) {
    highlighter.setMaxDocBytesToAnalyze(highlighterSettings.getMaxBytesToAnalyze());
  } else {
    highlighter.setMaxDocBytesToAnalyze(maxBytesToAnalyze);
  }
  return highlighter;
}

/**
 * Highlights chosen terms in a text, extracting the most relevant section. The document text is analysed in chunks to record
 * hit statistics across the document. After accumulating stats, the fragment with the highest score is returned
 * 
 * @param tokenStream
 *            a stream of tokens identified in the text parameter, including offset information. This is typically produced by
 *            an analyzer re-parsing a document's text. Some work may be done on retrieving TokenStreams more efficiently by
 *            adding support for storing original text position data in the Lucene index but this support is not currently
 *            available (as of Lucene 1.4 rc2).
 * @param text
 *            text to highlight terms in
 * 
 * @return highlighted text fragment or null if no terms found
 * @throws InvalidTokenOffsetsException
 *             thrown if any token's endOffset exceeds the provided text's length
 */
public final String getBestFragment(TokenStream tokenStream, String text) throws IOException, InvalidTokenOffsetsException {
  String[] results = getBestFragments(tokenStream, text, 1);
  if (results.length > 0) {
    return results[0];
  }
  return null;
}

@Override
public Multimap<String, String> getFragments(Document doc) {
  final Multimap<String, String> fragments = ArrayListMultimap.create();
  for (Map.Entry<String, Integer> entry : fields.entrySet()) {
    final String field = entry.getKey();
    final Integer maxNumFragments = entry.getValue();
    final String text = doc.get(field);
    if (text != null) {
      try {
        highlighter.setTextFragmenter(maxNumFragments > 0 ? new SimpleFragmenter() : new NullFragmenter());
        String[] fr = highlighter.getBestFragments(analyzer, field, text, maxNumFragments);
        if (fr != null && fr.length > 0) {
          fragments.putAll(field, Arrays.asList(fr));
        }
      } catch (IOException e) {
      } catch (InvalidTokenOffsetsException e) {
      }
    }
  }
  // TODO: fix exceptions.
  return fragments;
}

  private void resetHighlighter()
  {
    if (formatter != null)
    {
      this.highlighter = new Highlighter(formatter, new QueryScorer(query));
      this.highlighter.setEncoder(new DefaultEncoder());
    }
    else
    {
      this.highlighter = null;
    }
  }
}

/**
 * Highlights chosen terms in a text, extracting the most relevant sections. The document text is analysed in chunks to record
 * hit statistics across the document. After accumulating stats, the fragments with the highest scores are returned as an
 * array of strings in order of score (contiguous fragments are merged into one in their original order to improve
 * readability)
 * 
 * @param text
 *            text to highlight terms in
 * @param maxNumFragments
 *            the maximum number of fragments.
 * 
 * @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
 * @throws InvalidTokenOffsetsException
 *             thrown if any token's endOffset exceeds the provided text's length
 */
public final String[] getBestFragments(TokenStream tokenStream, String text, int maxNumFragments) throws IOException,
    InvalidTokenOffsetsException {
  maxNumFragments = Math.max(1, maxNumFragments); // sanity check
  TextFragment[] frag = getBestTextFragments(tokenStream, text, true, maxNumFragments);
  // Get text
  ArrayList<String> fragTexts = new ArrayList<String>();
  for (int i = 0; i < frag.length; i++) {
    if ((frag[i] != null) && (frag[i].getScore() > 0)) {
      fragTexts.add(frag[i].toString());
    }
  }
  return fragTexts.toArray(new String[0]);
}

highlighter.setTextFragmenter(
  new SimpleSpanFragmenter(scorer)
);
                analyzer2);
  String fragment = highlighter.getBestFragment(stream, title);

 SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
for (int i = 0; i < hits.length; i++) {
  int docId = hits[i].doc;
  Document d = searcher.doc(docId);
  String text = doc.get("contents");
  String bestFrag = highlighter.getBestFragment(analyzer, "contents", text);
  //output, however you like.

  Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
if (maxCharsToAnalyze < 0) {
 highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
} else {
 highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
 TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, docTexts[j], mergeContiguousFragments, numFragments);
 for (int k = 0; k < bestTextFragments.length; k++) {
  if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {

 @Override
 public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
  IndexReader reader = searcher.getIndexReader();
  highlighter.setFragmentScorer(new QueryScorer(q));
  // highlighter.setTextFragmenter();  unfortunately no sentence mechanism, not even regex. Default here is trivial
  for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
   Document document = reader.document(scoreDoc.doc, hlFields);
   Fields tvFields = termVecs ? reader.getTermVectors(scoreDoc.doc) : null;
   for (IndexableField indexableField : document) {
    TokenStream tokenStream;
    if (termVecs) {
     tokenStream = TokenSources.getTokenStream(indexableField.name(), tvFields,
       indexableField.stringValue(), analyzer, maxDocCharsToAnalyze);
    } else {
     tokenStream = analyzer.tokenStream(indexableField.name(), indexableField.stringValue());
    }
    // will close TokenStream:
    String[] fragments = highlighter.getBestFragments(tokenStream, indexableField.stringValue(), maxFrags);
    preventOptimizeAway = fragments.length;
   }
  }
 }
}

protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream,
                        String text, int maxNumFragments )
  throws IOException
{
  Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) );
  highlighter.setTextFragmenter( new OneLineFragmenter() );
  maxNumFragments = Math.max( 1, maxNumFragments ); // sanity check
  TextFragment[] frag;
  // Get text
  ArrayList<String> fragTexts = new ArrayList<String>( maxNumFragments );
  try
  {
    frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments );
    for ( int i = 0; i < frag.length; i++ )
    {
      if ( ( frag[i] != null ) && ( frag[i].getScore() > 0 ) )
      {
        fragTexts.add( frag[i].toString() );
      }
    }
  }
  catch ( InvalidTokenOffsetsException e )
  {
    // empty?
  }
  return fragTexts;
}

Highlighter highlighter = new Highlighter(FORMATTER, scorer);
      highlighter.setTextFragmenter(new NullFragmenter());
      highlightedDescMap.put(webSearchable, highlighter.getBestFragment(
          tokenStream, highlightString));

Javadoc

Class used to markup highlighted terms found in the best sections of a text, using configurable Fragmenter, Scorer, Formatter, Encoder and tokenizers.

Most used methods

<init>
setTextFragmenter
getBestFragments
getBestTextFragments
getBestFragment
setMaxDocCharsToAnalyze
mergeContiguousFragments
Improves readability of a score-sorted list of TextFragments by merging any fragments that were cont
setEncoder
setFragmentScorer
markUp
mergeTerms
setMaxDocBytesToAnalyze

Popular in Java

Making http requests using okhttp
addToBackStack (FragmentTransaction)
setContentView (Activity)
getExternalFilesDir (Context)
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
TimeUnit (java.util.concurrent)
A TimeUnit represents time durations at a given unit of granularity and provides utility methods to
Modifier (javassist)
The Modifier class provides static methods and constants to decode class and member access modifiers
ImageIO (javax.imageio)
CodeWhisperer alternatives

How to useHighlighter in org.apache.lucene.search.highlight

Best Java code snippets using org.apache.lucene.search.highlight.Highlighter (Showing top 20 results out of 315)

How to use
Highlighter
in
org.apache.lucene.search.highlight