@Override public Object cluster(Query query, SolrDocumentList solrDocList, Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) { try { // Prepare attributes for Carrot2 clustering call Map<String, Object> attributes = new HashMap<>(); List<Document> documents = getDocuments(solrDocList, docIds, query, sreq); attributes.put(AttributeNames.DOCUMENTS, documents); attributes.put(AttributeNames.QUERY, query.toString()); // Pass the fields on which clustering runs. attributes.put("solrFieldNames", getFieldsForClustering(sreq)); // Pass extra overriding attributes from the request, if any extractCarrotAttributes(sreq.getParams(), attributes); // Perform clustering and convert to an output structure of clusters. // // Carrot2 uses current thread's context class loader to get // certain classes (e.g. custom tokenizer/stemmer) at runtime. // To make sure classes from contrib JARs are available, // we swap the context class loader for the time of clustering. return withContextClassLoader(core.getResourceLoader().getClassLoader(), () -> clustersToNamedList(controller.process(attributes, clusteringAlgorithmClass).getClusters(), sreq.getParams())); } catch (Exception e) { log.error("Carrot2 clustering failed", e); throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e); } }
@Override protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){ SolrParams solrParams = sreq.getParams(); HashSet<String> fields = new HashSet<>(getFieldsForClustering(sreq)); fields.add(idFieldName); fields.add(solrParams.get(CarrotParams.URL_FIELD_NAME, "url")); fields.addAll(getCustomFieldsMap(solrParams).keySet()); String languageField = solrParams.get(CarrotParams.LANGUAGE_FIELD_NAME); if (StringUtils.isNotBlank(languageField)) { fields.add(languageField); } return fields; }
public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) { try { // Prepare attributes for Carrot2 clustering call Map<String, Object> attributes = new HashMap<String, Object>(); List<Document> documents = getDocuments(docList, query, sreq); attributes.put(AttributeNames.DOCUMENTS, documents); attributes.put(AttributeNames.QUERY, query.toString()); // Pass extra overriding attributes from the request, if any extractCarrotAttributes(sreq.getParams(), attributes); // Perform clustering and convert to named list return clustersToNamedList(controller.process(attributes, clusteringAlgorithmClass).getClusters(), sreq.getParams()); } catch (Exception e) { log.error("Carrot2 clustering failed", e); throw new RuntimeException(e); } }
private List clustersToNamedList(List<Cluster> carrotClusters, SolrParams solrParams) { List result = new ArrayList(); clustersToNamedList(carrotClusters, result, solrParams.getBool( CarrotParams.OUTPUT_SUB_CLUSTERS, true), solrParams.getInt( CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE)); return result; }
withContextClassLoader(core.getResourceLoader().getClassLoader(), () -> { try { AttributeValueSets avs = AttributeValueSets.deserialize(attributeXmls[0].open()); extractCarrotAttributes(initParams, initAttributes); withContextClassLoader(core.getResourceLoader().getClassLoader(), () -> this.controller.init(initAttributes));
@Override public String init(NamedList config, final SolrCore core) { String result = super.init(config, core); SolrParams initParams = SolrParams.toSolrParams(config); // Initialize Carrot2 controller. Pass initialization attributes, if any. HashMap<String, Object> initAttributes = new HashMap<String, Object>(); extractCarrotAttributes(initParams, initAttributes); this.controller.init(initAttributes); this.idFieldName = core.getSchema().getUniqueKeyField().getName(); // Make sure the requested Carrot2 clustering algorithm class is available String carrotAlgorithmClassName = initParams.get(CarrotParams.ALGORITHM); Class<?> algorithmClass = core.getResourceLoader().findClass(carrotAlgorithmClassName); if (!IClusteringAlgorithm.class.isAssignableFrom(algorithmClass)) { throw new IllegalArgumentException("Class provided as " + CarrotParams.ALGORITHM + " must implement " + IClusteringAlgorithm.class.getName()); } this.clusteringAlgorithmClass = (Class<? extends IClusteringAlgorithm>) algorithmClass; return result; }
Map<String, String> customFields = getCustomFieldsMap(solrParams); snippet = getConcatenated(sdoc, snippetFieldSpec); Document carrotDocument = new Document(getConcatenated(sdoc, titleFieldSpec), snippet, ObjectUtils.toString(sdoc.getFieldValue(urlField), ""));
org.apache.lucene.document.Document doc = searcher.doc(id, fieldSelector); String snippet = getValue(doc, snippetField); if (produceSummary == true) { docsHolder[0] = id.intValue(); highligher.doHighlighting(docAsList, theQuery, req, snippetFieldAry); Document carrotDocument = new Document(getValue(doc, titleField), snippet, doc.get(urlField)); carrotDocument.addField("solrId", doc.get(idFieldName));
private List<NamedList<Object>> clustersToNamedList(List<Cluster> carrotClusters, SolrParams solrParams) { List<NamedList<Object>> result = new ArrayList<>(); clustersToNamedList(carrotClusters, result, solrParams.getBool( CarrotParams.OUTPUT_SUB_CLUSTERS, true), solrParams.getInt( CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE)); return result; }
private void clustersToNamedList(List<Cluster> outputClusters, List parent, boolean outputSubClusters, int maxLabels) { for (Cluster outCluster : outputClusters) { NamedList cluster = new SimpleOrderedMap(); parent.add(cluster); List<String> labels = outCluster.getPhrases(); if (labels.size() > maxLabels) labels = labels.subList(0, maxLabels); cluster.add("labels", labels); List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments(); List docList = new ArrayList(); cluster.add("docs", docList); for (Document doc : docs) { docList.add(doc.getField("solrId")); } if (outputSubClusters) { List subclusters = new ArrayList(); cluster.add("clusters", subclusters); clustersToNamedList(outCluster.getSubclusters(), subclusters, outputSubClusters, maxLabels); } } }
List<NamedList<Object>> subclusters = new ArrayList<>(); cluster.add("clusters", subclusters); clustersToNamedList(outCluster.getSubclusters(), subclusters, outputSubClusters, maxLabels);