org.apache.lucene.search.highlight.QueryTermExtractor.getTerms java code examples

/**
 * Extracts all terms texts of a given Query into an array of WeightedTerms
 *
 * @param query      Query to extract term texts from
 * @return an array of the terms used in a query, plus their weights.
 */
public static final WeightedTerm[] getTerms(Query query)
{
 return getTerms(query,false);
}

private static final void getTerms(Query query, float boost, HashSet<WeightedTerm> terms, boolean prohibited, String fieldName) {
 try {
  if (query instanceof BoostQuery) {
   BoostQuery boostQuery = (BoostQuery) query;
   getTerms(boostQuery.getQuery(), boost * boostQuery.getBoost(), terms, prohibited, fieldName);
  } else if (query instanceof BooleanQuery)
   getTermsFromBooleanQuery((BooleanQuery) query, boost, terms, prohibited, fieldName);
  else if (query instanceof FilteredQuery)
   getTermsFromFilteredQuery((FilteredQuery) query, boost, terms, prohibited, fieldName);
  else {
   HashSet<Term> nonWeightedTerms = new HashSet<>();
   try {
    EMPTY_INDEXSEARCHER.createNormalizedWeight(query, false).extractTerms(nonWeightedTerms);
   } catch (IOException bogus) {
    throw new RuntimeException("Should not happen on an empty index", bogus);
   }
   for (Iterator<Term> iter = nonWeightedTerms.iterator(); iter.hasNext(); ) {
    Term term = iter.next();
    if ((fieldName == null) || (term.field().equals(fieldName))) {
     terms.add(new WeightedTerm(boost, term.text()));
    }
   }
  }
 } catch (UnsupportedOperationException ignore) {
  //this is non-fatal for our purposes
 }
}

/**
 * 
 * @param query a Lucene query (ideally rewritten using query.rewrite before
 *        being passed to this class and the searcher)
 * @param fieldName the Field name which is used to match Query terms
 */
public QueryTermScorer(Query query, String fieldName) {
 this(QueryTermExtractor.getTerms(query, false, fieldName));
}

/**
 * 
 * @param query a Lucene query (ideally rewritten using query.rewrite before
 *        being passed to this class and the searcher)
 */
public QueryTermScorer(Query query) {
 this(QueryTermExtractor.getTerms(query));
}

/**
 * Extracts all terms texts of a given Query into an array of WeightedTerms
 *
 * @param query      Query to extract term texts from
 * @param prohibited <code>true</code> to extract "prohibited" terms, too
 * @return an array of the terms used in a query, plus their weights.
 */
public static final WeightedTerm[] getTerms(Query query, boolean prohibited)
{
  return getTerms(query,prohibited,null);
}

/**
 * Extracts all terms texts of a given Query into an array of WeightedTerms
 *
 * @param query      Query to extract term texts from
 * @param prohibited <code>true</code> to extract "prohibited" terms, too
 * @param fieldName  The fieldName used to filter query terms
 * @return an array of the terms used in a query, plus their weights.
 */
public static final WeightedTerm[] getTerms(Query query, boolean prohibited, String fieldName)
{
 HashSet<WeightedTerm> terms=new HashSet<>();
 getTerms(query, 1f, terms,prohibited,fieldName);
 return terms.toArray(new WeightedTerm[0]);
}

private static void getTermsFromFilteredQuery(FilteredQuery query, float boost, HashSet<WeightedTerm> terms, boolean prohibited, String fieldName)
{
 getTerms(query.getQuery(), boost, terms,prohibited,fieldName);
}

/**
 * Extracts search terms and their weights
 * @param query
 * @return
 */
private static Map<String, Float> extractTerms(Query query)
{
  WeightedTerm[] weightedTerms = QueryTermExtractor.getTerms(query, false, Field.CONTENTS.toString());
  
  Map<String,Float> terms = new HashMap<String,Float>(weightedTerms.length);
  
  for(WeightedTerm weightedTerm: weightedTerms)
    terms.put( weightedTerm.getTerm(), weightedTerm.getWeight() );
  
  return terms;
}

/**
 * extractTerms is currently the only query-independent means of introspecting queries but it only reveals
 * a list of terms for that query - not the boosts each individual term in that query may or may not have.
 * "Container" queries such as BooleanQuery should be unwrapped to get at the boost info held
 * in each child element.
 * Some discussion around this topic here:
 * http://www.gossamer-threads.com/lists/lucene/java-dev/34208?search_string=introspection;#34208
 * Unfortunately there seemed to be limited interest in requiring all Query objects to implement
 * something common which would allow access to child queries so what follows here are query-specific
 * implementations for accessing embedded query elements.
 */
private static final void getTermsFromBooleanQuery(BooleanQuery query, float boost, HashSet<WeightedTerm> terms, boolean prohibited, String fieldName)
{
 for (BooleanClause clause : query)
 {
  if (prohibited || clause.getOccur()!=BooleanClause.Occur.MUST_NOT)
   getTerms(clause.getQuery(), boost, terms, prohibited, fieldName);
 }
}

/**
 * Extracts all terms texts of a given Query into an array of WeightedTerms
 *
 * @param query      Query to extract term texts from
 * @param reader used to compute IDF which can be used to a) score selected fragments better
 * b) use graded highlights eg changing intensity of font color
 * @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based
 * @return an array of the terms used in a query, plus their weights.
 */
public static final WeightedTerm[] getIdfWeightedTerms(Query query, IndexReader reader, String fieldName)
{
  WeightedTerm[] terms=getTerms(query,false, fieldName);
  int totalNumDocs=reader.maxDoc();
  for (int i = 0; i < terms.length; i++)
   {
    try
     {
       int docFreq=reader.docFreq(new Term(fieldName,terms[i].term));
       //IDF algorithm taken from DefaultSimilarity class
       float idf=(float)(Math.log(totalNumDocs/(double)(docFreq+1)) + 1.0);
       terms[i].weight*=idf;
     } 
    catch (IOException e)
     {
      //ignore
     }
   }
 return terms;
}

WeightedTerm[] wts = QueryTermExtractor.getTerms(rewritten);
Set<String> terms = new HashSet<>();
for (WeightedTerm wt: wts) {

Javadoc

Extracts all terms texts of a given Query into an array of WeightedTerms

Popular methods of QueryTermExtractor

getIdfWeightedTerms
Extracts all terms texts of a given Query into an array of WeightedTerms
getTermsFromBooleanQuery
extractTerms is currently the only query-independent means of introspecting queries but it only reve
getTermsFromFilteredQuery

Popular in Java

Reading from database using SQL prepared statement
putExtra (Intent)
getSupportFragmentManager (FragmentActivity)
addToBackStack (FragmentTransaction)
FileWriter (java.io)
A specialized Writer that writes to a file in the file system. All write requests made by calling me
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
SecureRandom (java.security)
This class generates cryptographically secure pseudo-random numbers. It is best to invoke SecureRand
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
HttpServlet (javax.servlet.http)
Provides an abstract class to be subclassed to create an HTTP servlet suitable for a Web site. A sub
Scheduler (org.quartz)
This is the main interface of a Quartz Scheduler. A Scheduler maintains a registry of org.quartz.Job
Github Copilot alternatives

How to use getTermsmethodin org.apache.lucene.search.highlight.QueryTermExtractor

Best Java code snippets using org.apache.lucene.search.highlight.QueryTermExtractor.getTerms (Showing top 11 results out of 315)

How to use
getTerms
method
in
org.apache.lucene.search.highlight.QueryTermExtractor