for (int topic = 0; topic < numTopics; topic++) { Iterator<IDSorter> iterator = topicSortedWords.get(topic).iterator(); out = new Formatter(new StringBuilder(), Locale.US); out.format("%d\t%.3f\t", topic, model.getTopicProbabilities(docID)[topic]); int rank = 0; while (iterator.hasNext() && rank < 5) { IDSorter idCountPair = iterator.next(); out.format("%s (%.3f) ", dataAlphabet.lookupObject(idCountPair.getID()), idCountPair.getWeight()); rank++; } System.out.println(out); } System.out.println("\n");
public String topWords (int numWords) { StringBuilder output = new StringBuilder(); IDSorter[] sortedWords = new IDSorter[numTypes]; for (int topic = 0; topic < numTopics; topic++) { for (int type = 0; type < numTypes; type++) { sortedWords[type] = new IDSorter(type, typeTopicCounts[type][topic]); } Arrays.sort(sortedWords); output.append(topic + "\t" + tokensPerTopic[topic] + "\t"); for (int i=0; i < numWords; i++) { output.append(alphabet.lookupObject(sortedWords[i].getID()) + " "); } output.append("\n"); } return output.toString(); }
public String topWords (int numWords) { StringBuilder output = new StringBuilder(); IDSorter[] sortedWords = new IDSorter[numTypes]; for (int topic = 0; topic < numTopics; topic++) { for (int type = 0; type < numTypes; type++) { sortedWords[type] = new IDSorter(type, typeTopicCounts[type][topic]); } Arrays.sort(sortedWords); output.append(topic + "\t" + tokensPerTopic[topic] + "\t"); for (int i=0; i < numWords; i++) { output.append(alphabet.lookupObject(sortedWords[i].getID()) + " "); } output.append("\n"); } return output.toString(); }
public String topWords (int numWords) { StringBuilder output = new StringBuilder(); IDSorter[] sortedWords = new IDSorter[numTypes]; for (int topic = 0; topic < numTopics; topic++) { for (int type = 0; type < numTypes; type++) { sortedWords[type] = new IDSorter(type, typeTopicCounts[type][topic]); } Arrays.sort(sortedWords); output.append(topic + "\t" + tokensPerTopic[topic] + "\t"); for (int i=0; i < numWords; i++) { output.append(alphabet.lookupObject(sortedWords[i].getID()) + " "); } output.append("\n"); } return output.toString(); }
public String topWords (int numWords) { StringBuilder output = new StringBuilder(); IDSorter[] sortedWords = new IDSorter[numTypes]; for (int topic = 0; topic < numTopics; topic++) { for (int type = 0; type < numTypes; type++) { sortedWords[type] = new IDSorter(type, typeTopicCounts[type][topic]); } Arrays.sort(sortedWords); output.append(topic + "\t" + tokensPerTopic[topic] + "\t" + formatter.format(totalTopicWeights[topic])); for (int i=0; i < numWords; i++) { output.append(alphabet.lookupObject(sortedWords[i].getID()) + " "); } output.append("\n"); } return output.toString(); }
public String topWords (int numWords) { StringBuilder output = new StringBuilder(); IDSorter[] sortedWords = new IDSorter[numTypes]; for (int topic = 0; topic < numTopics; topic++) { for (int type = 0; type < numTypes; type++) { sortedWords[type] = new IDSorter(type, typeTopicCounts[type][topic]); } Arrays.sort(sortedWords); output.append(topic + "\t" + tokensPerTopic[topic] + "\t" + formatter.format(totalTopicWeights[topic])); for (int i=0; i < numWords; i++) { output.append(alphabet.lookupObject(sortedWords[i].getID()) + " "); } output.append("\n"); } return output.toString(); }
public String topWords (int numWords) { StringBuilder output = new StringBuilder(); IDSorter[] sortedWords = new IDSorter[numTypes]; for (int topic = 0; topic < numTopics; topic++) { for (int type = 0; type < numTypes; type++) { sortedWords[type] = new IDSorter(type, typeTopicCounts[type][topic]); } Arrays.sort(sortedWords); output.append(topic + "\t" + tokensPerTopic[topic] + "\t" + formatter.format(totalTopicWeights[topic])); for (int i=0; i < numWords; i++) { output.append(alphabet.lookupObject(sortedWords[i].getID()) + " "); } output.append("\n"); } return output.toString(); }
public void topicXMLReport (PrintWriter out, int numWords) { ArrayList<TreeSet<IDSorter>> topicSortedWords = getSortedWords(); out.println("<?xml version='1.0' ?>"); out.println("<topicModel>"); for (int topic = 0; topic < numTopics; topic++) { out.println(" <topic id='" + topic + "' alpha='" + alpha[topic] + "' totalTokens='" + tokensPerTopic[topic] + "'>"); int rank = 1; Iterator<IDSorter> iterator = topicSortedWords.get(topic).iterator(); while (iterator.hasNext() && rank <= numWords) { IDSorter info = iterator.next(); out.println(" <word rank='" + rank + "' count='" + info.getWeight() + "'>" + alphabet.lookupObject(info.getID()) + "</word>"); rank++; } out.println(" </topic>"); } out.println("</topicModel>"); }
public String getTopWords(int numWords, boolean withWeight) { IDSorter[] sortedTypes = new IDSorter[numTypes]; for (int type=0; type < numTypes; type++) { sortedTypes[type] = new IDSorter(type, typeCounts[type]); } Arrays.sort(sortedTypes); Alphabet alphabet = instances.getDataAlphabet(); StringBuffer out = new StringBuffer(); for (int i = 0; i < numWords; i++) { if (withWeight){ out.append(alphabet.lookupObject(sortedTypes[i].getID()) + ":" + sortedTypes[i].getWeight() + " "); }else out.append(alphabet.lookupObject(sortedTypes[i].getID()) + " "); } return out.toString(); }
public String getTopWords(int numWords, boolean withWeight) { IDSorter[] sortedTypes = new IDSorter[numTypes]; for (int type=0; type < numTypes; type++) { sortedTypes[type] = new IDSorter(type, typeCounts[type]); } Arrays.sort(sortedTypes); Alphabet alphabet = instances.getDataAlphabet(); StringBuffer out = new StringBuffer(); for (int i = 0; i < numWords; i++) { if (withWeight){ out.append(alphabet.lookupObject(sortedTypes[i].getID()) + ":" + sortedTypes[i].getWeight() + " "); }else out.append(alphabet.lookupObject(sortedTypes[i].getID()) + " "); } return out.toString(); }
public String getTopWords(int numWords, boolean withWeight) { IDSorter[] sortedTypes = new IDSorter[numTypes]; for (int type=0; type < numTypes; type++) { sortedTypes[type] = new IDSorter(type, typeCounts[type]); } Arrays.sort(sortedTypes); Alphabet alphabet = instances.getDataAlphabet(); StringBuffer out = new StringBuffer(); for (int i = 0; i < numWords; i++) { if (withWeight){ out.append(alphabet.lookupObject(sortedTypes[i].getID()) + ":" + sortedTypes[i].getWeight() + " "); }else out.append(alphabet.lookupObject(sortedTypes[i].getID()) + " "); } return out.toString(); }
public String topWords (int numWords) { StringBuilder output = new StringBuilder(); IDSorter[] sortedWords = new IDSorter[numTypes]; for (int topic = 0; topic < numTopics; topic++) { if (tokensPerTopic[topic] == 0) { continue; } for (int type = 0; type < numTypes; type++) { sortedWords[type] = new IDSorter(type, typeTopicCounts[type][topic]); } Arrays.sort(sortedWords); output.append(topic + "\t" + labelAlphabet.lookupObject(topic) + "\t" + tokensPerTopic[topic] + "\t"); for (int i=0; i < numWords; i++) { if (sortedWords[i].getWeight() == 0) { break; } output.append(alphabet.lookupObject(sortedWords[i].getID()) + " "); } output.append("\n"); } return output.toString(); }
public String topWords (int numWords) { StringBuilder output = new StringBuilder(); IDSorter[] sortedWords = new IDSorter[numTypes]; for (int topic = 0; topic < numTopics; topic++) { if (tokensPerTopic[topic] == 0) { continue; } for (int type = 0; type < numTypes; type++) { sortedWords[type] = new IDSorter(type, typeTopicCounts[type][topic]); } Arrays.sort(sortedWords); output.append(topic + "\t" + labelAlphabet.lookupObject(topic) + "\t" + tokensPerTopic[topic] + "\t"); for (int i=0; i < numWords; i++) { if (sortedWords[i].getWeight() == 0) { break; } output.append(alphabet.lookupObject(sortedWords[i].getID()) + " "); } output.append("\n"); } return output.toString(); }
public String topWords (int numWords) { StringBuilder output = new StringBuilder(); IDSorter[] sortedWords = new IDSorter[numTypes]; for (int topic = 0; topic < numTopics; topic++) { if (tokensPerTopic[topic] == 0) { continue; } for (int type = 0; type < numTypes; type++) { sortedWords[type] = new IDSorter(type, typeTopicCounts[type][topic]); } Arrays.sort(sortedWords); output.append(topic + "\t" + labelAlphabet.lookupObject(topic) + "\t" + tokensPerTopic[topic] + "\t"); for (int i=0; i < numWords; i++) { if (sortedWords[i].getWeight() == 0) { break; } output.append(alphabet.lookupObject(sortedWords[i].getID()) + " "); } output.append("\n"); } return output.toString(); }
public String topWords (int numWords) { StringBuilder output = new StringBuilder(); IDSorter[] sortedWords = new IDSorter[numTypes]; for (int topic: docsPerTopic.keys()) { for (int type = 0; type < numTypes; type++) { sortedWords[type] = new IDSorter(type, typeTopicCounts[type].get(topic)); } Arrays.sort(sortedWords); output.append(topic + "\t" + tokensPerTopic.get(topic) + "\t"); for (int i=0; i < numWords; i++) { if (sortedWords[i].getWeight() < 1.0) { break; } output.append(alphabet.lookupObject(sortedWords[i].getID()) + " "); } output.append("\n"); } return output.toString(); }
public String topWords (int numWords) { StringBuilder output = new StringBuilder(); IDSorter[] sortedWords = new IDSorter[numTypes]; for (int topic: docsPerTopic.keys()) { for (int type = 0; type < numTypes; type++) { sortedWords[type] = new IDSorter(type, typeTopicCounts[type].get(topic)); } Arrays.sort(sortedWords); output.append(topic + "\t" + tokensPerTopic.get(topic) + "\t"); for (int i=0; i < numWords; i++) { if (sortedWords[i].getWeight() < 1.0) { break; } output.append(alphabet.lookupObject(sortedWords[i].getID()) + " "); } output.append("\n"); } return output.toString(); }
public String topWords (int numWords) { StringBuilder output = new StringBuilder(); IDSorter[] sortedWords = new IDSorter[numTypes]; for (int topic: docsPerTopic.keys()) { for (int type = 0; type < numTypes; type++) { sortedWords[type] = new IDSorter(type, typeTopicCounts[type].get(topic)); } Arrays.sort(sortedWords); output.append(topic + "\t" + tokensPerTopic.get(topic) + "\t"); for (int i=0; i < numWords; i++) { if (sortedWords[i].getWeight() < 1.0) { break; } output.append(alphabet.lookupObject(sortedWords[i].getID()) + " "); } output.append("\n"); } return output.toString(); }
public TopicScores getEffectiveNumberOfWords() { int[] tokensPerTopic = model.tokensPerTopic; TopicScores scores = new TopicScores("eff_num_words", numTopics, numTopWords); int numTypes = alphabet.size(); for (int topic = 0; topic < numTopics; topic++) { double sumSquaredProbabilities = 0.0; TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); for (IDSorter info: sortedWords) { int type = info.getID(); double probability = info.getWeight() / tokensPerTopic[topic]; sumSquaredProbabilities += probability * probability; } scores.setTopicScore(topic, 1.0 / sumSquaredProbabilities); } return scores; }
public TopicScores getEffectiveNumberOfWords() { int[] tokensPerTopic = model.tokensPerTopic; TopicScores scores = new TopicScores("eff_num_words", numTopics, numTopWords); int numTypes = alphabet.size(); for (int topic = 0; topic < numTopics; topic++) { double sumSquaredProbabilities = 0.0; TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); for (IDSorter info: sortedWords) { int type = info.getID(); double probability = info.getWeight() / tokensPerTopic[topic]; sumSquaredProbabilities += probability * probability; } scores.setTopicScore(topic, 1.0 / sumSquaredProbabilities); } return scores; }
public TopicScores getEffectiveNumberOfWords() { int[] tokensPerTopic = model.tokensPerTopic; TopicScores scores = new TopicScores("eff_num_words", numTopics, numTopWords); int numTypes = alphabet.size(); for (int topic = 0; topic < numTopics; topic++) { double sumSquaredProbabilities = 0.0; TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); for (IDSorter info: sortedWords) { int type = info.getID(); double probability = info.getWeight() / tokensPerTopic[topic]; sumSquaredProbabilities += probability * probability; } scores.setTopicScore(topic, 1.0 / sumSquaredProbabilities); } return scores; }