cc.mallet.topics.TopicModelDiagnostics$TopicScores java code examples

public TopicScores getTokenDocumentDiscrepancies() {
  TopicScores scores = new TopicScores("token-doc-diff", numTopics, numTopWords);
  scores.wordScoresDefined = true;
      scores.setTopicWordScore(topic, position, score);
      topicScore += score;
    scores.setTopicScore(topic, topicScore);

TopicScores scores = new TopicScores("word-length-sd", numTopics, numTopWords);
scores.wordScoresDefined = true;
    scores.addToTopicScore(topic, (length - meanLength) / lengthSD);
    scores.setTopicWordScore(topic, position, (length - meanLength) / lengthSD);

TopicScores scores = new TopicScores("exclusivity", numTopics, numTopWords);
scores.wordScoresDefined = true;
    scores.setTopicWordScore(topic, position, score);
    topicScore += score;
  scores.setTopicScore(topic, topicScore / numTopWords);

public TopicScores getTokenDocumentDiscrepancies() {
  TopicScores scores = new TopicScores("token-doc-diff", numTopics, numTopWords);
  scores.wordScoresDefined = true;
      scores.setTopicWordScore(topic, position, score);
      topicScore += score;
    scores.setTopicScore(topic, topicScore);

TopicScores scores = new TopicScores("word-length-sd", numTopics, numTopWords);
scores.wordScoresDefined = true;
    scores.addToTopicScore(topic, (length - meanLength) / lengthSD);
    scores.setTopicWordScore(topic, position, (length - meanLength) / lengthSD);

TopicScores scores = new TopicScores("word-length-sd", numTopics, numTopWords);
scores.wordScoresDefined = true;
    scores.addToTopicScore(topic, (length - meanLength) / lengthSD);
    scores.setTopicWordScore(topic, position, (length - meanLength) / lengthSD);

TopicScores scores = new TopicScores("exclusivity", numTopics, numTopWords);
scores.wordScoresDefined = true;
    scores.setTopicWordScore(topic, position, score);
    topicScore += score;
  scores.setTopicScore(topic, topicScore / numTopWords);

public TopicScores getTokenDocumentDiscrepancies() {
  TopicScores scores = new TopicScores("token-doc-diff", numTopics, numTopWords);
  scores.wordScoresDefined = true;
      scores.setTopicWordScore(topic, position, score);
      topicScore += score;
    scores.setTopicScore(topic, topicScore);

/** Low-quality topics may be very similar to the global distribution. */
public TopicScores getDistanceFromCorpus() {
  int[] tokensPerTopic = model.tokensPerTopic;
  TopicScores scores = new TopicScores("corpus_dist", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  for (int topic = 0; topic < numTopics; topic++) {
    double coefficient = (double) numTokens / tokensPerTopic[topic];
    double topicScore = 0.0;
    int position = 0;
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    for (IDSorter info: sortedWords) {
      int type = info.getID();
      double count = info.getWeight();
      double score = (count / tokensPerTopic[topic]) *
        Math.log( coefficient * count / wordTypeCounts[type] );
      if (position < numTopWords) {
        //System.out.println(alphabet.lookupObject(type) + ": " + count + " * " + numTokens + " / " + wordTypeCounts[type] + " * " + tokensPerTopic[topic] + " = " + (coefficient * count / wordTypeCounts[type]));
        scores.setTopicWordScore(topic, position, score);
      }
      
      topicScore += score;
      position++;
    }
    scores.setTopicScore(topic, topicScore);
  }
  return scores;
}

/** Low-quality topics may be very similar to the global distribution. */
public TopicScores getDistanceFromCorpus() {
  int[] tokensPerTopic = model.tokensPerTopic;
  TopicScores scores = new TopicScores("corpus_dist", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  for (int topic = 0; topic < numTopics; topic++) {
    double coefficient = (double) numTokens / tokensPerTopic[topic];
    double topicScore = 0.0;
    int position = 0;
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    for (IDSorter info: sortedWords) {
      int type = info.getID();
      double count = info.getWeight();
      double score = (count / tokensPerTopic[topic]) *
        Math.log( coefficient * count / wordTypeCounts[type] );
      if (position < numTopWords) {
        //System.out.println(alphabet.lookupObject(type) + ": " + count + " * " + numTokens + " / " + wordTypeCounts[type] + " * " + tokensPerTopic[topic] + " = " + (coefficient * count / wordTypeCounts[type]));
        scores.setTopicWordScore(topic, position, score);
      }
      
      topicScore += score;
      position++;
    }
    scores.setTopicScore(topic, topicScore);
  }
  return scores;
}

/** Low-quality topics may be very similar to the global distribution. */
public TopicScores getDistanceFromCorpus() {
  int[] tokensPerTopic = model.tokensPerTopic;
  TopicScores scores = new TopicScores("corpus_dist", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  for (int topic = 0; topic < numTopics; topic++) {
    double coefficient = (double) numTokens / tokensPerTopic[topic];
    double topicScore = 0.0;
    int position = 0;
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    for (IDSorter info: sortedWords) {
      int type = info.getID();
      double count = info.getWeight();
      double score = (count / tokensPerTopic[topic]) *
        Math.log( coefficient * count / wordTypeCounts[type] );
      if (position < numTopWords) {
        //System.out.println(alphabet.lookupObject(type) + ": " + count + " * " + numTokens + " / " + wordTypeCounts[type] + " * " + tokensPerTopic[topic] + " = " + (coefficient * count / wordTypeCounts[type]));
        scores.setTopicWordScore(topic, position, score);
      }
      
      topicScore += score;
      position++;
    }
    scores.setTopicScore(topic, topicScore);
  }
  return scores;
}

public TopicScores getDistanceFromUniform() {
  int[] tokensPerTopic = model.tokensPerTopic;
  TopicScores scores = new TopicScores("uniform_dist", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  int numTypes = alphabet.size();
  for (int topic = 0; topic < numTopics; topic++) {
    double topicScore = 0.0;
    int position = 0;
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    for (IDSorter info: sortedWords) {
      int type = info.getID();
      double count = info.getWeight();
      double score = (count / tokensPerTopic[topic]) *
        Math.log( (count * numTypes) / tokensPerTopic[topic] );
      if (position < numTopWords) {
        scores.setTopicWordScore(topic, position, score);
      }
      
      topicScore += score;
      position++;
    }
    scores.setTopicScore(topic, topicScore);
  }
  return scores;
}

public TopicScores getDistanceFromUniform() {
  int[] tokensPerTopic = model.tokensPerTopic;
  TopicScores scores = new TopicScores("uniform_dist", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  int numTypes = alphabet.size();
  for (int topic = 0; topic < numTopics; topic++) {
    double topicScore = 0.0;
    int position = 0;
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    for (IDSorter info: sortedWords) {
      int type = info.getID();
      double count = info.getWeight();
      double score = (count / tokensPerTopic[topic]) *
        Math.log( (count * numTypes) / tokensPerTopic[topic] );
      if (position < numTopWords) {
        scores.setTopicWordScore(topic, position, score);
      }
      
      topicScore += score;
      position++;
    }
    scores.setTopicScore(topic, topicScore);
  }
  return scores;
}

public TopicScores getDistanceFromUniform() {
  int[] tokensPerTopic = model.tokensPerTopic;
  TopicScores scores = new TopicScores("uniform_dist", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  int numTypes = alphabet.size();
  for (int topic = 0; topic < numTopics; topic++) {
    double topicScore = 0.0;
    int position = 0;
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    for (IDSorter info: sortedWords) {
      int type = info.getID();
      double count = info.getWeight();
      double score = (count / tokensPerTopic[topic]) *
        Math.log( (count * numTypes) / tokensPerTopic[topic] );
      if (position < numTopWords) {
        scores.setTopicWordScore(topic, position, score);
      }
      
      topicScore += score;
      position++;
    }
    scores.setTopicScore(topic, topicScore);
  }
  return scores;
}

public TopicScores getCoherence() {
  TopicScores scores = new TopicScores("coherence", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  for (int topic = 0; topic < numTopics; topic++) {
    int[][] matrix = topicCodocumentMatrices[topic];
    double topicScore = 0.0;
    for (int row = 0; row < numTopWords; row++) {
      double rowScore = 0.0;
      double minScore = 0.0;
      for (int col = 0; col < row; col++) {
        double score = Math.log( (matrix[row][col] + model.beta) / (matrix[col][col] + model.beta) );
        rowScore += score;
        if (score < minScore) { minScore = score; }
      }
      topicScore += rowScore;
      scores.setTopicWordScore(topic, row, minScore);
    }
    scores.setTopicScore(topic, topicScore);
  }
  
  return scores;
}

public TopicScores getCoherence() {
  TopicScores scores = new TopicScores("coherence", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  for (int topic = 0; topic < numTopics; topic++) {
    int[][] matrix = topicCodocumentMatrices[topic];
    double topicScore = 0.0;
    for (int row = 0; row < numTopWords; row++) {
      double rowScore = 0.0;
      double minScore = 0.0;
      for (int col = 0; col < row; col++) {
        double score = Math.log( (matrix[row][col] + model.beta) / (matrix[col][col] + model.beta) );
        rowScore += score;
        if (score < minScore) { minScore = score; }
      }
      topicScore += rowScore;
      scores.setTopicWordScore(topic, row, minScore);
    }
    scores.setTopicScore(topic, topicScore);
  }
  
  return scores;
}

public TopicScores getCoherence() {
  TopicScores scores = new TopicScores("coherence", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  for (int topic = 0; topic < numTopics; topic++) {
    int[][] matrix = topicCodocumentMatrices[topic];
    double topicScore = 0.0;
    for (int row = 0; row < numTopWords; row++) {
      double rowScore = 0.0;
      double minScore = 0.0;
      for (int col = 0; col < row; col++) {
        double score = Math.log( (matrix[row][col] + model.beta) / (matrix[col][col] + model.beta) );
        rowScore += score;
        if (score < minScore) { minScore = score; }
      }
      topicScore += rowScore;
      scores.setTopicWordScore(topic, row, minScore);
    }
    scores.setTopicScore(topic, topicScore);
  }
  
  return scores;
}

public TopicScores getEffectiveNumberOfWords() {
  int[] tokensPerTopic = model.tokensPerTopic;
  TopicScores scores = new TopicScores("eff_num_words", numTopics, numTopWords);
  int numTypes = alphabet.size();
  for (int topic = 0; topic < numTopics; topic++) {
    double sumSquaredProbabilities = 0.0;
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    for (IDSorter info: sortedWords) {
      int type = info.getID();
      double probability = info.getWeight() / tokensPerTopic[topic];
      
      sumSquaredProbabilities += probability * probability;
    }
    scores.setTopicScore(topic, 1.0 / sumSquaredProbabilities);
  }
  return scores;
}

public TopicScores getEffectiveNumberOfWords() {
  int[] tokensPerTopic = model.tokensPerTopic;
  TopicScores scores = new TopicScores("eff_num_words", numTopics, numTopWords);
  int numTypes = alphabet.size();
  for (int topic = 0; topic < numTopics; topic++) {
    double sumSquaredProbabilities = 0.0;
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    for (IDSorter info: sortedWords) {
      int type = info.getID();
      double probability = info.getWeight() / tokensPerTopic[topic];
      
      sumSquaredProbabilities += probability * probability;
    }
    scores.setTopicScore(topic, 1.0 / sumSquaredProbabilities);
  }
  return scores;
}

public TopicScores getEffectiveNumberOfWords() {
  int[] tokensPerTopic = model.tokensPerTopic;
  TopicScores scores = new TopicScores("eff_num_words", numTopics, numTopWords);
  int numTypes = alphabet.size();
  for (int topic = 0; topic < numTopics; topic++) {
    double sumSquaredProbabilities = 0.0;
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    for (IDSorter info: sortedWords) {
      int type = info.getID();
      double probability = info.getWeight() / tokensPerTopic[topic];
      
      sumSquaredProbabilities += probability * probability;
    }
    scores.setTopicScore(topic, 1.0 / sumSquaredProbabilities);
  }
  return scores;
}

Most used methods

Popular in Java

Updating database using SQL prepared statement
setScale (BigDecimal)
scheduleAtFixedRate (Timer)
setContentView (Activity)
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
Cipher (javax.crypto)
This class provides access to implementations of cryptographic ciphers for encryption and decryption
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
Logger (org.apache.log4j)
This is the central class in the log4j package. Most logging operations, except configuration, are d
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
ImageIO (javax.imageio)
Top 12 Jupyter Notebook extensions

How to useTopicModelDiagnostics$TopicScores in cc.mallet.topics

Best Java code snippets using cc.mallet.topics.TopicModelDiagnostics$TopicScores (Showing top 20 results out of 315)

How to use
TopicModelDiagnostics$TopicScores
in
cc.mallet.topics