public TopicScores getDocumentPercent(int i) { TopicScores scores = new TopicScores("allocation_count", numTopics, numTopWords); if (i > numDocumentsAtProportions[0].length) { System.err.println("Invalid proportion indices (max " + (numDocumentsAtProportions[0].length - 1) + ") : " + i); return scores; } for (int topic = 0; topic < numTopics; topic++) { scores.setTopicScore(topic, (double) numDocumentsAtProportions[topic][i] / numNonZeroDocuments[topic]); } return scores; }
public TopicScores getDocumentPercent(int i) { TopicScores scores = new TopicScores("allocation_count", numTopics, numTopWords); if (i > numDocumentsAtProportions[0].length) { System.err.println("Invalid proportion indices (max " + (numDocumentsAtProportions[0].length - 1) + ") : " + i); return scores; } for (int topic = 0; topic < numTopics; topic++) { scores.setTopicScore(topic, (double) numDocumentsAtProportions[topic][i] / numNonZeroDocuments[topic]); } return scores; }
public TopicScores getDocumentPercent(int i) { TopicScores scores = new TopicScores("allocation_count", numTopics, numTopWords); if (i > numDocumentsAtProportions[0].length) { System.err.println("Invalid proportion indices (max " + (numDocumentsAtProportions[0].length - 1) + ") : " + i); return scores; } for (int topic = 0; topic < numTopics; topic++) { scores.setTopicScore(topic, (double) numDocumentsAtProportions[topic][i] / numNonZeroDocuments[topic]); } return scores; }
public TopicScores getEffectiveNumberOfWords() { int[] tokensPerTopic = model.tokensPerTopic; TopicScores scores = new TopicScores("eff_num_words", numTopics, numTopWords); int numTypes = alphabet.size(); for (int topic = 0; topic < numTopics; topic++) { double sumSquaredProbabilities = 0.0; TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); for (IDSorter info: sortedWords) { int type = info.getID(); double probability = info.getWeight() / tokensPerTopic[topic]; sumSquaredProbabilities += probability * probability; } scores.setTopicScore(topic, 1.0 / sumSquaredProbabilities); } return scores; }
/** Low-quality topics often have lots of unusually short words. */ public TopicScores getWordLengthScores() { TopicScores scores = new TopicScores("word-length", numTopics, numTopWords); scores.wordScoresDefined = true; for (int topic = 0; topic < numTopics; topic++) { int total = 0; for (int position = 0; position < topicTopWords[topic].length; position++) { if (topicTopWords[topic][position] == null) { break; } int length = topicTopWords[topic][position].length(); total += length; scores.setTopicWordScore(topic, position, length); } scores.setTopicScore(topic, (double) total / topicTopWords[topic].length); } return scores; }
/** Low-quality topics often have lots of unusually short words. */ public TopicScores getWordLengthScores() { TopicScores scores = new TopicScores("word-length", numTopics, numTopWords); scores.wordScoresDefined = true; for (int topic = 0; topic < numTopics; topic++) { int total = 0; for (int position = 0; position < topicTopWords[topic].length; position++) { if (topicTopWords[topic][position] == null) { break; } int length = topicTopWords[topic][position].length(); total += length; scores.setTopicWordScore(topic, position, length); } scores.setTopicScore(topic, (double) total / topicTopWords[topic].length); } return scores; }
public TopicScores getEffectiveNumberOfWords() { int[] tokensPerTopic = model.tokensPerTopic; TopicScores scores = new TopicScores("eff_num_words", numTopics, numTopWords); int numTypes = alphabet.size(); for (int topic = 0; topic < numTopics; topic++) { double sumSquaredProbabilities = 0.0; TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); for (IDSorter info: sortedWords) { int type = info.getID(); double probability = info.getWeight() / tokensPerTopic[topic]; sumSquaredProbabilities += probability * probability; } scores.setTopicScore(topic, 1.0 / sumSquaredProbabilities); } return scores; }
public TopicScores getEffectiveNumberOfWords() { int[] tokensPerTopic = model.tokensPerTopic; TopicScores scores = new TopicScores("eff_num_words", numTopics, numTopWords); int numTypes = alphabet.size(); for (int topic = 0; topic < numTopics; topic++) { double sumSquaredProbabilities = 0.0; TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); for (IDSorter info: sortedWords) { int type = info.getID(); double probability = info.getWeight() / tokensPerTopic[topic]; sumSquaredProbabilities += probability * probability; } scores.setTopicScore(topic, 1.0 / sumSquaredProbabilities); } return scores; }
public TopicScores getDocumentPercentRatio(int numeratorIndex, int denominatorIndex) { TopicScores scores = new TopicScores("allocation_ratio", numTopics, numTopWords); if (numeratorIndex > numDocumentsAtProportions[0].length || denominatorIndex > numDocumentsAtProportions[0].length) { System.err.println("Invalid proportion indices (max " + (numDocumentsAtProportions[0].length - 1) + ") : " + numeratorIndex + ", " + denominatorIndex); return scores; } for (int topic = 0; topic < numTopics; topic++) { scores.setTopicScore(topic, (double) numDocumentsAtProportions[topic][numeratorIndex] / numDocumentsAtProportions[topic][denominatorIndex]); } return scores; }
public TopicScores getDocumentPercentRatio(int numeratorIndex, int denominatorIndex) { TopicScores scores = new TopicScores("allocation_ratio", numTopics, numTopWords); if (numeratorIndex > numDocumentsAtProportions[0].length || denominatorIndex > numDocumentsAtProportions[0].length) { System.err.println("Invalid proportion indices (max " + (numDocumentsAtProportions[0].length - 1) + ") : " + numeratorIndex + ", " + denominatorIndex); return scores; } for (int topic = 0; topic < numTopics; topic++) { scores.setTopicScore(topic, (double) numDocumentsAtProportions[topic][numeratorIndex] / numDocumentsAtProportions[topic][denominatorIndex]); } return scores; }
public TopicScores getDocumentPercentRatio(int numeratorIndex, int denominatorIndex) { TopicScores scores = new TopicScores("allocation_ratio", numTopics, numTopWords); if (numeratorIndex > numDocumentsAtProportions[0].length || denominatorIndex > numDocumentsAtProportions[0].length) { System.err.println("Invalid proportion indices (max " + (numDocumentsAtProportions[0].length - 1) + ") : " + numeratorIndex + ", " + denominatorIndex); return scores; } for (int topic = 0; topic < numTopics; topic++) { scores.setTopicScore(topic, (double) numDocumentsAtProportions[topic][numeratorIndex] / numDocumentsAtProportions[topic][denominatorIndex]); } return scores; }
public TopicScores getDocumentEntropy(int[] tokensPerTopic) { TopicScores scores = new TopicScores("document_entropy", numTopics, numTopWords); for (int topic = 0; topic < numTopics; topic++) { scores.setTopicScore(topic, -sumCountTimesLogCount[topic] / tokensPerTopic[topic] + Math.log(tokensPerTopic[topic])); } return scores; }
public TopicScores getDocumentEntropy(int[] tokensPerTopic) { TopicScores scores = new TopicScores("document_entropy", numTopics, numTopWords); for (int topic = 0; topic < numTopics; topic++) { scores.setTopicScore(topic, -sumCountTimesLogCount[topic] / tokensPerTopic[topic] + Math.log(tokensPerTopic[topic])); } return scores; }
public TopicScores getTokensPerTopic(int[] tokensPerTopic) { TopicScores scores = new TopicScores("tokens", numTopics, numTopWords); for (int topic = 0; topic < numTopics; topic++) { scores.setTopicScore(topic, tokensPerTopic[topic]); } return scores; }
public TopicScores getRank1Percent() { TopicScores scores = new TopicScores("rank_1_docs", numTopics, numTopWords); for (int topic = 0; topic < numTopics; topic++) { scores.setTopicScore(topic, (double) numRank1Documents[topic] / numNonZeroDocuments[topic]); } return scores; }
public TopicScores getDocumentEntropy(int[] tokensPerTopic) { TopicScores scores = new TopicScores("document_entropy", numTopics, numTopWords); for (int topic = 0; topic < numTopics; topic++) { scores.setTopicScore(topic, -sumCountTimesLogCount[topic] / tokensPerTopic[topic] + Math.log(tokensPerTopic[topic])); } return scores; }
public TopicScores getRank1Percent() { TopicScores scores = new TopicScores("rank_1_docs", numTopics, numTopWords); for (int topic = 0; topic < numTopics; topic++) { scores.setTopicScore(topic, (double) numRank1Documents[topic] / numNonZeroDocuments[topic]); } return scores; }
public TopicScores getTokensPerTopic(int[] tokensPerTopic) { TopicScores scores = new TopicScores("tokens", numTopics, numTopWords); for (int topic = 0; topic < numTopics; topic++) { scores.setTopicScore(topic, tokensPerTopic[topic]); } return scores; }
public TopicScores getRank1Percent() { TopicScores scores = new TopicScores("rank_1_docs", numTopics, numTopWords); for (int topic = 0; topic < numTopics; topic++) { scores.setTopicScore(topic, (double) numRank1Documents[topic] / numNonZeroDocuments[topic]); } return scores; }
public TopicScores getTokensPerTopic(int[] tokensPerTopic) { TopicScores scores = new TopicScores("tokens", numTopics, numTopWords); for (int topic = 0; topic < numTopics; topic++) { scores.setTopicScore(topic, tokensPerTopic[topic]); } return scores; }