equivalent.documents.or(cc.documents);
/** * Create the junk (unassigned documents) cluster and create the final * set of clusters in Carrot2 format. */ private void postProcessing(List<ClusterCandidate> clusters) { // Adapt to Carrot2 classes, counting used documents on the way. final BitSet all = new BitSet(documents.size()); final ArrayList<Document> docs = Lists.newArrayListWithCapacity(documents.size()); final ArrayList<String> phrases = Lists.newArrayListWithCapacity(3); for (ClusterCandidate c : clusters) { final Cluster c2 = new Cluster(); c2.addPhrases(collectPhrases(phrases, c)); c2.addDocuments(collectDocuments(docs, c.documents)); c2.setScore((double) c.score); this.clusters.add(c2); all.or(c.documents); docs.clear(); phrases.clear(); } Collections.sort(this.clusters, Cluster.byReversedWeightedScoreAndSizeComparator(scoreWeight)); Cluster.appendOtherTopics(this.documents, this.clusters); }
result.documents.or(cc.documents); result.score += cc.score;
public boolean isArcPresent(int clusterA, int clusterB) { temp.clear(); int size; BitSet setA = clusterDocuments[clusterA]; BitSet setB = clusterDocuments[clusterB]; // Suitable for flat clustering // A small subgroup contained within a bigger group // will give small overlap ratio. Big ratios will // be produced only for balanced group sizes. if (setA.cardinality() < setB.cardinality()) { // addAll == or // reiatinAll == and | intersect temp.or(setA); temp.intersect(setB); size = (int) setB.cardinality(); } else { temp.or(setB); temp.intersect(setA); size = (int) setA.cardinality(); } return temp.cardinality() / (double) size >= clusterMergingThreshold; } }, true);
if (clusterIndex != mergeBaseClusterIndex) clusterDocuments[mergeBaseClusterIndex].or( clusterDocuments[clusterIndex]); clusterLabelFeatureIndex[clusterIndex] = -1;
countDocs(level + 1, childState); edges.discard(2); me.or(child);