public TopicScores getTokenDocumentDiscrepancies() { TopicScores scores = new TopicScores("token-doc-diff", numTopics, numTopWords); scores.wordScoresDefined = true; scores.setTopicWordScore(topic, position, score); topicScore += score; scores.setTopicScore(topic, topicScore);
TopicScores scores = new TopicScores("word-length-sd", numTopics, numTopWords); scores.wordScoresDefined = true; scores.addToTopicScore(topic, (length - meanLength) / lengthSD); scores.setTopicWordScore(topic, position, (length - meanLength) / lengthSD);
TopicScores scores = new TopicScores("exclusivity", numTopics, numTopWords); scores.wordScoresDefined = true; scores.setTopicWordScore(topic, position, score); topicScore += score; scores.setTopicScore(topic, topicScore / numTopWords);
public TopicScores getTokenDocumentDiscrepancies() { TopicScores scores = new TopicScores("token-doc-diff", numTopics, numTopWords); scores.wordScoresDefined = true; scores.setTopicWordScore(topic, position, score); topicScore += score; scores.setTopicScore(topic, topicScore);
TopicScores scores = new TopicScores("word-length-sd", numTopics, numTopWords); scores.wordScoresDefined = true; scores.addToTopicScore(topic, (length - meanLength) / lengthSD); scores.setTopicWordScore(topic, position, (length - meanLength) / lengthSD);
TopicScores scores = new TopicScores("word-length-sd", numTopics, numTopWords); scores.wordScoresDefined = true; scores.addToTopicScore(topic, (length - meanLength) / lengthSD); scores.setTopicWordScore(topic, position, (length - meanLength) / lengthSD);
TopicScores scores = new TopicScores("exclusivity", numTopics, numTopWords); scores.wordScoresDefined = true; scores.setTopicWordScore(topic, position, score); topicScore += score; scores.setTopicScore(topic, topicScore / numTopWords);
public TopicScores getTokenDocumentDiscrepancies() { TopicScores scores = new TopicScores("token-doc-diff", numTopics, numTopWords); scores.wordScoresDefined = true; scores.setTopicWordScore(topic, position, score); topicScore += score; scores.setTopicScore(topic, topicScore);
/** Low-quality topics may be very similar to the global distribution. */ public TopicScores getDistanceFromCorpus() { int[] tokensPerTopic = model.tokensPerTopic; TopicScores scores = new TopicScores("corpus_dist", numTopics, numTopWords); scores.wordScoresDefined = true; for (int topic = 0; topic < numTopics; topic++) { double coefficient = (double) numTokens / tokensPerTopic[topic]; double topicScore = 0.0; int position = 0; TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); for (IDSorter info: sortedWords) { int type = info.getID(); double count = info.getWeight(); double score = (count / tokensPerTopic[topic]) * Math.log( coefficient * count / wordTypeCounts[type] ); if (position < numTopWords) { //System.out.println(alphabet.lookupObject(type) + ": " + count + " * " + numTokens + " / " + wordTypeCounts[type] + " * " + tokensPerTopic[topic] + " = " + (coefficient * count / wordTypeCounts[type])); scores.setTopicWordScore(topic, position, score); } topicScore += score; position++; } scores.setTopicScore(topic, topicScore); } return scores; }
/** Low-quality topics may be very similar to the global distribution. */ public TopicScores getDistanceFromCorpus() { int[] tokensPerTopic = model.tokensPerTopic; TopicScores scores = new TopicScores("corpus_dist", numTopics, numTopWords); scores.wordScoresDefined = true; for (int topic = 0; topic < numTopics; topic++) { double coefficient = (double) numTokens / tokensPerTopic[topic]; double topicScore = 0.0; int position = 0; TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); for (IDSorter info: sortedWords) { int type = info.getID(); double count = info.getWeight(); double score = (count / tokensPerTopic[topic]) * Math.log( coefficient * count / wordTypeCounts[type] ); if (position < numTopWords) { //System.out.println(alphabet.lookupObject(type) + ": " + count + " * " + numTokens + " / " + wordTypeCounts[type] + " * " + tokensPerTopic[topic] + " = " + (coefficient * count / wordTypeCounts[type])); scores.setTopicWordScore(topic, position, score); } topicScore += score; position++; } scores.setTopicScore(topic, topicScore); } return scores; }
/** Low-quality topics may be very similar to the global distribution. */ public TopicScores getDistanceFromCorpus() { int[] tokensPerTopic = model.tokensPerTopic; TopicScores scores = new TopicScores("corpus_dist", numTopics, numTopWords); scores.wordScoresDefined = true; for (int topic = 0; topic < numTopics; topic++) { double coefficient = (double) numTokens / tokensPerTopic[topic]; double topicScore = 0.0; int position = 0; TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); for (IDSorter info: sortedWords) { int type = info.getID(); double count = info.getWeight(); double score = (count / tokensPerTopic[topic]) * Math.log( coefficient * count / wordTypeCounts[type] ); if (position < numTopWords) { //System.out.println(alphabet.lookupObject(type) + ": " + count + " * " + numTokens + " / " + wordTypeCounts[type] + " * " + tokensPerTopic[topic] + " = " + (coefficient * count / wordTypeCounts[type])); scores.setTopicWordScore(topic, position, score); } topicScore += score; position++; } scores.setTopicScore(topic, topicScore); } return scores; }
public TopicScores getDistanceFromUniform() { int[] tokensPerTopic = model.tokensPerTopic; TopicScores scores = new TopicScores("uniform_dist", numTopics, numTopWords); scores.wordScoresDefined = true; int numTypes = alphabet.size(); for (int topic = 0; topic < numTopics; topic++) { double topicScore = 0.0; int position = 0; TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); for (IDSorter info: sortedWords) { int type = info.getID(); double count = info.getWeight(); double score = (count / tokensPerTopic[topic]) * Math.log( (count * numTypes) / tokensPerTopic[topic] ); if (position < numTopWords) { scores.setTopicWordScore(topic, position, score); } topicScore += score; position++; } scores.setTopicScore(topic, topicScore); } return scores; }
public TopicScores getDistanceFromUniform() { int[] tokensPerTopic = model.tokensPerTopic; TopicScores scores = new TopicScores("uniform_dist", numTopics, numTopWords); scores.wordScoresDefined = true; int numTypes = alphabet.size(); for (int topic = 0; topic < numTopics; topic++) { double topicScore = 0.0; int position = 0; TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); for (IDSorter info: sortedWords) { int type = info.getID(); double count = info.getWeight(); double score = (count / tokensPerTopic[topic]) * Math.log( (count * numTypes) / tokensPerTopic[topic] ); if (position < numTopWords) { scores.setTopicWordScore(topic, position, score); } topicScore += score; position++; } scores.setTopicScore(topic, topicScore); } return scores; }
public TopicScores getDistanceFromUniform() { int[] tokensPerTopic = model.tokensPerTopic; TopicScores scores = new TopicScores("uniform_dist", numTopics, numTopWords); scores.wordScoresDefined = true; int numTypes = alphabet.size(); for (int topic = 0; topic < numTopics; topic++) { double topicScore = 0.0; int position = 0; TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); for (IDSorter info: sortedWords) { int type = info.getID(); double count = info.getWeight(); double score = (count / tokensPerTopic[topic]) * Math.log( (count * numTypes) / tokensPerTopic[topic] ); if (position < numTopWords) { scores.setTopicWordScore(topic, position, score); } topicScore += score; position++; } scores.setTopicScore(topic, topicScore); } return scores; }
public TopicScores getCoherence() { TopicScores scores = new TopicScores("coherence", numTopics, numTopWords); scores.wordScoresDefined = true; for (int topic = 0; topic < numTopics; topic++) { int[][] matrix = topicCodocumentMatrices[topic]; double topicScore = 0.0; for (int row = 0; row < numTopWords; row++) { double rowScore = 0.0; double minScore = 0.0; for (int col = 0; col < row; col++) { double score = Math.log( (matrix[row][col] + model.beta) / (matrix[col][col] + model.beta) ); rowScore += score; if (score < minScore) { minScore = score; } } topicScore += rowScore; scores.setTopicWordScore(topic, row, minScore); } scores.setTopicScore(topic, topicScore); } return scores; }
public TopicScores getCoherence() { TopicScores scores = new TopicScores("coherence", numTopics, numTopWords); scores.wordScoresDefined = true; for (int topic = 0; topic < numTopics; topic++) { int[][] matrix = topicCodocumentMatrices[topic]; double topicScore = 0.0; for (int row = 0; row < numTopWords; row++) { double rowScore = 0.0; double minScore = 0.0; for (int col = 0; col < row; col++) { double score = Math.log( (matrix[row][col] + model.beta) / (matrix[col][col] + model.beta) ); rowScore += score; if (score < minScore) { minScore = score; } } topicScore += rowScore; scores.setTopicWordScore(topic, row, minScore); } scores.setTopicScore(topic, topicScore); } return scores; }
public TopicScores getCoherence() { TopicScores scores = new TopicScores("coherence", numTopics, numTopWords); scores.wordScoresDefined = true; for (int topic = 0; topic < numTopics; topic++) { int[][] matrix = topicCodocumentMatrices[topic]; double topicScore = 0.0; for (int row = 0; row < numTopWords; row++) { double rowScore = 0.0; double minScore = 0.0; for (int col = 0; col < row; col++) { double score = Math.log( (matrix[row][col] + model.beta) / (matrix[col][col] + model.beta) ); rowScore += score; if (score < minScore) { minScore = score; } } topicScore += rowScore; scores.setTopicWordScore(topic, row, minScore); } scores.setTopicScore(topic, topicScore); } return scores; }
public TopicScores getEffectiveNumberOfWords() { int[] tokensPerTopic = model.tokensPerTopic; TopicScores scores = new TopicScores("eff_num_words", numTopics, numTopWords); int numTypes = alphabet.size(); for (int topic = 0; topic < numTopics; topic++) { double sumSquaredProbabilities = 0.0; TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); for (IDSorter info: sortedWords) { int type = info.getID(); double probability = info.getWeight() / tokensPerTopic[topic]; sumSquaredProbabilities += probability * probability; } scores.setTopicScore(topic, 1.0 / sumSquaredProbabilities); } return scores; }
public TopicScores getEffectiveNumberOfWords() { int[] tokensPerTopic = model.tokensPerTopic; TopicScores scores = new TopicScores("eff_num_words", numTopics, numTopWords); int numTypes = alphabet.size(); for (int topic = 0; topic < numTopics; topic++) { double sumSquaredProbabilities = 0.0; TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); for (IDSorter info: sortedWords) { int type = info.getID(); double probability = info.getWeight() / tokensPerTopic[topic]; sumSquaredProbabilities += probability * probability; } scores.setTopicScore(topic, 1.0 / sumSquaredProbabilities); } return scores; }
public TopicScores getEffectiveNumberOfWords() { int[] tokensPerTopic = model.tokensPerTopic; TopicScores scores = new TopicScores("eff_num_words", numTopics, numTopWords); int numTypes = alphabet.size(); for (int topic = 0; topic < numTopics; topic++) { double sumSquaredProbabilities = 0.0; TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic); for (IDSorter info: sortedWords) { int type = info.getID(); double probability = info.getWeight() / tokensPerTopic[topic]; sumSquaredProbabilities += probability * probability; } scores.setTopicScore(topic, 1.0 / sumSquaredProbabilities); } return scores; }