org.apache.solr.handler.clustering.carrot2.CarrotClusteringEngine java code examples

@Override
public Object cluster(Query query, SolrDocumentList solrDocList,
  Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
 try {
  // Prepare attributes for Carrot2 clustering call
  Map<String, Object> attributes = new HashMap<>();
  List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
  attributes.put(AttributeNames.DOCUMENTS, documents);
  attributes.put(AttributeNames.QUERY, query.toString());

  // Pass the fields on which clustering runs.
  attributes.put("solrFieldNames", getFieldsForClustering(sreq));

  // Pass extra overriding attributes from the request, if any
  extractCarrotAttributes(sreq.getParams(), attributes);

  // Perform clustering and convert to an output structure of clusters.
  //
  // Carrot2 uses current thread's context class loader to get
  // certain classes (e.g. custom tokenizer/stemmer) at runtime.
  // To make sure classes from contrib JARs are available,
  // we swap the context class loader for the time of clustering.
  return withContextClassLoader(core.getResourceLoader().getClassLoader(),
    () -> clustersToNamedList(controller.process(attributes,
      clusteringAlgorithmClass).getClusters(), sreq.getParams()));
 } catch (Exception e) {
  log.error("Carrot2 clustering failed", e);
  throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
 }
}

@Override
protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
 SolrParams solrParams = sreq.getParams();
 HashSet<String> fields = new HashSet<>(getFieldsForClustering(sreq));
 fields.add(idFieldName);
 fields.add(solrParams.get(CarrotParams.URL_FIELD_NAME, "url"));
 fields.addAll(getCustomFieldsMap(solrParams).keySet());
 String languageField = solrParams.get(CarrotParams.LANGUAGE_FIELD_NAME);
 if (StringUtils.isNotBlank(languageField)) { 
  fields.add(languageField);
 }
 return fields;
}

public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) {
 try {
  // Prepare attributes for Carrot2 clustering call
  Map<String, Object> attributes = new HashMap<String, Object>();
  List<Document> documents = getDocuments(docList, query, sreq);
  attributes.put(AttributeNames.DOCUMENTS, documents);
  attributes.put(AttributeNames.QUERY, query.toString());
  // Pass extra overriding attributes from the request, if any
  extractCarrotAttributes(sreq.getParams(), attributes);
  // Perform clustering and convert to named list
  return clustersToNamedList(controller.process(attributes,
      clusteringAlgorithmClass).getClusters(), sreq.getParams());
 } catch (Exception e) {
  log.error("Carrot2 clustering failed", e);
  throw new RuntimeException(e);
 }
}

private List clustersToNamedList(List<Cluster> carrotClusters,
                 SolrParams solrParams) {
 List result = new ArrayList();
 clustersToNamedList(carrotClusters, result, solrParams.getBool(
     CarrotParams.OUTPUT_SUB_CLUSTERS, true), solrParams.getInt(
     CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE));
 return result;
}

  withContextClassLoader(core.getResourceLoader().getClassLoader(), () -> {
   try {
    AttributeValueSets avs = AttributeValueSets.deserialize(attributeXmls[0].open());
extractCarrotAttributes(initParams, initAttributes);
withContextClassLoader(core.getResourceLoader().getClassLoader(), () -> this.controller.init(initAttributes));

@Override
public String init(NamedList config, final SolrCore core) {
 String result = super.init(config, core);
 SolrParams initParams = SolrParams.toSolrParams(config);
 // Initialize Carrot2 controller. Pass initialization attributes, if any.
 HashMap<String, Object> initAttributes = new HashMap<String, Object>();
 extractCarrotAttributes(initParams, initAttributes);
 this.controller.init(initAttributes);
 this.idFieldName = core.getSchema().getUniqueKeyField().getName();
 // Make sure the requested Carrot2 clustering algorithm class is available
 String carrotAlgorithmClassName = initParams.get(CarrotParams.ALGORITHM);
 Class<?> algorithmClass = core.getResourceLoader().findClass(carrotAlgorithmClassName);
 if (!IClusteringAlgorithm.class.isAssignableFrom(algorithmClass)) {
  throw new IllegalArgumentException("Class provided as "
      + CarrotParams.ALGORITHM + " must implement "
      + IClusteringAlgorithm.class.getName());
 }
 this.clusteringAlgorithmClass = (Class<? extends IClusteringAlgorithm>) algorithmClass;
 return result;
}

Map<String, String> customFields = getCustomFieldsMap(solrParams);
  snippet = getConcatenated(sdoc, snippetFieldSpec);
 Document carrotDocument = new Document(getConcatenated(sdoc, titleFieldSpec),
     snippet, ObjectUtils.toString(sdoc.getFieldValue(urlField), ""));

org.apache.lucene.document.Document doc = searcher.doc(id,
    fieldSelector);
String snippet = getValue(doc, snippetField);
if (produceSummary == true) {
 docsHolder[0] = id.intValue();
 highligher.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
Document carrotDocument = new Document(getValue(doc, titleField),
    snippet, doc.get(urlField));
carrotDocument.addField("solrId", doc.get(idFieldName));

private List<NamedList<Object>> clustersToNamedList(List<Cluster> carrotClusters,
                 SolrParams solrParams) {
 List<NamedList<Object>> result = new ArrayList<>();
 clustersToNamedList(carrotClusters, result, solrParams.getBool(
     CarrotParams.OUTPUT_SUB_CLUSTERS, true), solrParams.getInt(
     CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE));
 return result;
}

private void clustersToNamedList(List<Cluster> outputClusters,
                 List parent, boolean outputSubClusters, int maxLabels) {
 for (Cluster outCluster : outputClusters) {
  NamedList cluster = new SimpleOrderedMap();
  parent.add(cluster);
  List<String> labels = outCluster.getPhrases();
  if (labels.size() > maxLabels)
   labels = labels.subList(0, maxLabels);
  cluster.add("labels", labels);
  List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments();
  List docList = new ArrayList();
  cluster.add("docs", docList);
  for (Document doc : docs) {
   docList.add(doc.getField("solrId"));
  }
  if (outputSubClusters) {
   List subclusters = new ArrayList();
   cluster.add("clusters", subclusters);
   clustersToNamedList(outCluster.getSubclusters(), subclusters,
       outputSubClusters, maxLabels);
  }
 }
}

List<NamedList<Object>> subclusters = new ArrayList<>();
cluster.add("clusters", subclusters);
clustersToNamedList(outCluster.getSubclusters(), subclusters,
    outputSubClusters, maxLabels);

Javadoc

Search results clustering engine based on Carrot2 clustering algorithms.

Output from this class is subject to change.

Most used methods

clustersToNamedList
extractCarrotAttributes
Extracts parameters that can possibly match some attributes of Carrot2 algorithms.
getDocuments
Prepares Carrot2 documents for clustering.
getConcatenated
getCustomFieldsMap
Prepares a map of Solr field names (keys) to the corresponding Carrot2 custom field names.
getFieldsForClustering
Returns the names of fields that will be delivering the actual content for clustering. Currently, th
getValue
withContextClassLoader

Popular in Java

Creating JSON documents from java classes using gson
notifyDataSetChanged (ArrayAdapter)
compareTo (BigDecimal)
setScale (BigDecimal)
FileNotFoundException (java.io)
Thrown when a file specified by a program cannot be found.
InetAddress (java.net)
An Internet Protocol (IP) address. This can be either an IPv4 address or an IPv6 address, and in pra
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
Locale (java.util)
Locale represents a language/country/variant combination. Locales are used to alter the presentatio
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
FlowLayout (java.awt)
A flow layout arranges components in a left-to-right flow, much like lines of text in a paragraph. F
From CI to AI: The AI layer in your organization

How to useCarrotClusteringEngine in org.apache.solr.handler.clustering.carrot2

Best Java code snippets using org.apache.solr.handler.clustering.carrot2.CarrotClusteringEngine (Showing top 11 results out of 315)

How to use
CarrotClusteringEngine
in
org.apache.solr.handler.clustering.carrot2