public boolean apply(Cluster cluster) { return !cluster.getSubclusters().isEmpty(); } });
private static List<Cluster> flatten(Collection<Cluster> hierarchical, List<Cluster> flat) { for (Cluster c : hierarchical) { flat.add(c); flatten(c.getSubclusters(), flat); } return flat; }
/** * Locate the first cluster that has id equal to <code>id</code>. The search includes * all the clusters in the input and their sub-clusters. The first cluster with * matching identifier is returned or <code>null</code> if no such cluster could be * found. */ public static Cluster find(int id, Collection<Cluster> clusters) { for (Cluster c : clusters) { if (c != null) { if (c.id != null && c.id == id) { return c; } if (!c.getSubclusters().isEmpty()) { final Cluster sub = find(id, c.getSubclusters()); if (sub != null) { return sub; } } } } return null; }
@Override public String toString() { return "[Cluster, label: " + getLabel() + ", docs: " + size() + ", subclusters: " + getSubclusters().size() + "]"; } }
/** * Replace document refids with the actual references upon deserialization. */ private void documentIdToReference(Cluster cluster, Map<String, Document> documents) { if (cluster.documentIds != null) { for (Cluster.DocumentRefid documentRefid : cluster.documentIds) { cluster.addDocuments(documents.get(documentRefid.refid)); } } for (Cluster subcluster : cluster.getSubclusters()) { documentIdToReference(subcluster, documents); } }
/** * A recursive routine for collecting unique documents from this cluster and * subclusters. */ private static Set<Document> collectAllDocuments(Cluster cluster, Set<Document> docs) { if (cluster == null) { return docs; } docs.addAll(cluster.getDocuments()); final List<Cluster> subclusters = cluster.getSubclusters(); for (final Cluster subcluster : subclusters) { collectAllDocuments(subcluster, docs); } return docs; }
private void clustersToNamedList(List<Cluster> outputClusters, List parent, boolean outputSubClusters, int maxLabels) { for (Cluster outCluster : outputClusters) { NamedList cluster = new SimpleOrderedMap(); parent.add(cluster); List<String> labels = outCluster.getPhrases(); if (labels.size() > maxLabels) labels = labels.subList(0, maxLabels); cluster.add("labels", labels); List<Document> docs = outputSubClusters ? outCluster.getDocuments() : outCluster.getAllDocuments(); List docList = new ArrayList(); cluster.add("docs", docList); for (Document doc : docs) { docList.add(doc.getField("solrId")); } if (outputSubClusters) { List subclusters = new ArrayList(); cluster.add("clusters", subclusters); clustersToNamedList(outCluster.getSubclusters(), subclusters, outputSubClusters, maxLabels); } } }
private static List<Cluster> sanityCheck(List<Cluster> in, Predicate<Document> docFilter) { List<Cluster> cloned = Lists.newArrayListWithCapacity(in.size()); for (Cluster c : in) { Cluster c2 = new Cluster(); c2.addPhrases(c.getPhrases()); c2.addDocuments( Iterables.filter(c.getDocuments(), docFilter)); c2.addSubclusters(sanityCheck(c.getSubclusters(), docFilter)); cloned.add(c2); } return cloned; } }
if (outputSubClusters && !outCluster.getSubclusters().isEmpty()) { List<NamedList<Object>> subclusters = new ArrayList<>(); cluster.add("clusters", subclusters); clustersToNamedList(outCluster.getSubclusters(), subclusters, outputSubClusters, maxLabels);
for (Cluster cluster : clusters) final List<Cluster> subclusters = cluster.getSubclusters(); for (Cluster subcluster : subclusters) flattenedClusters.addAll(majorLanguageCluster.getSubclusters());
clusterLabel = subcluster.getPhrases().get(0); cluster.addDocuments(subcluster.getDocuments()); cluster.addSubclusters(subcluster.getSubclusters());