cc.mallet.topics.TopicModelDiagnostics$TopicScores.setTopicWordScore java code examples

scores.setTopicWordScore(topic, position, score);
topicScore += score;

scores.setTopicWordScore(topic, position, score);
topicScore += score;

scores.setTopicWordScore(topic, position, (length - meanLength) / lengthSD);

scores.setTopicWordScore(topic, position, score);
topicScore += score;

scores.setTopicWordScore(topic, position, score);
topicScore += score;

scores.setTopicWordScore(topic, position, score);
topicScore += score;

scores.setTopicWordScore(topic, position, (length - meanLength) / lengthSD);

scores.setTopicWordScore(topic, position, (length - meanLength) / lengthSD);

/** Low-quality topics may be very similar to the global distribution. */
public TopicScores getDistanceFromCorpus() {
  int[] tokensPerTopic = model.tokensPerTopic;
  TopicScores scores = new TopicScores("corpus_dist", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  for (int topic = 0; topic < numTopics; topic++) {
    double coefficient = (double) numTokens / tokensPerTopic[topic];
    double topicScore = 0.0;
    int position = 0;
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    for (IDSorter info: sortedWords) {
      int type = info.getID();
      double count = info.getWeight();
      double score = (count / tokensPerTopic[topic]) *
        Math.log( coefficient * count / wordTypeCounts[type] );
      if (position < numTopWords) {
        //System.out.println(alphabet.lookupObject(type) + ": " + count + " * " + numTokens + " / " + wordTypeCounts[type] + " * " + tokensPerTopic[topic] + " = " + (coefficient * count / wordTypeCounts[type]));
        scores.setTopicWordScore(topic, position, score);
      }
      
      topicScore += score;
      position++;
    }
    scores.setTopicScore(topic, topicScore);
  }
  return scores;
}

/** Low-quality topics may be very similar to the global distribution. */
public TopicScores getDistanceFromCorpus() {
  int[] tokensPerTopic = model.tokensPerTopic;
  TopicScores scores = new TopicScores("corpus_dist", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  for (int topic = 0; topic < numTopics; topic++) {
    double coefficient = (double) numTokens / tokensPerTopic[topic];
    double topicScore = 0.0;
    int position = 0;
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    for (IDSorter info: sortedWords) {
      int type = info.getID();
      double count = info.getWeight();
      double score = (count / tokensPerTopic[topic]) *
        Math.log( coefficient * count / wordTypeCounts[type] );
      if (position < numTopWords) {
        //System.out.println(alphabet.lookupObject(type) + ": " + count + " * " + numTokens + " / " + wordTypeCounts[type] + " * " + tokensPerTopic[topic] + " = " + (coefficient * count / wordTypeCounts[type]));
        scores.setTopicWordScore(topic, position, score);
      }
      
      topicScore += score;
      position++;
    }
    scores.setTopicScore(topic, topicScore);
  }
  return scores;
}

public TopicScores getDistanceFromUniform() {
  int[] tokensPerTopic = model.tokensPerTopic;
  TopicScores scores = new TopicScores("uniform_dist", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  int numTypes = alphabet.size();
  for (int topic = 0; topic < numTopics; topic++) {
    double topicScore = 0.0;
    int position = 0;
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    for (IDSorter info: sortedWords) {
      int type = info.getID();
      double count = info.getWeight();
      double score = (count / tokensPerTopic[topic]) *
        Math.log( (count * numTypes) / tokensPerTopic[topic] );
      if (position < numTopWords) {
        scores.setTopicWordScore(topic, position, score);
      }
      
      topicScore += score;
      position++;
    }
    scores.setTopicScore(topic, topicScore);
  }
  return scores;
}

/** Low-quality topics may be very similar to the global distribution. */
public TopicScores getDistanceFromCorpus() {
  int[] tokensPerTopic = model.tokensPerTopic;
  TopicScores scores = new TopicScores("corpus_dist", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  for (int topic = 0; topic < numTopics; topic++) {
    double coefficient = (double) numTokens / tokensPerTopic[topic];
    double topicScore = 0.0;
    int position = 0;
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    for (IDSorter info: sortedWords) {
      int type = info.getID();
      double count = info.getWeight();
      double score = (count / tokensPerTopic[topic]) *
        Math.log( coefficient * count / wordTypeCounts[type] );
      if (position < numTopWords) {
        //System.out.println(alphabet.lookupObject(type) + ": " + count + " * " + numTokens + " / " + wordTypeCounts[type] + " * " + tokensPerTopic[topic] + " = " + (coefficient * count / wordTypeCounts[type]));
        scores.setTopicWordScore(topic, position, score);
      }
      
      topicScore += score;
      position++;
    }
    scores.setTopicScore(topic, topicScore);
  }
  return scores;
}

public TopicScores getDistanceFromUniform() {
  int[] tokensPerTopic = model.tokensPerTopic;
  TopicScores scores = new TopicScores("uniform_dist", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  int numTypes = alphabet.size();
  for (int topic = 0; topic < numTopics; topic++) {
    double topicScore = 0.0;
    int position = 0;
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    for (IDSorter info: sortedWords) {
      int type = info.getID();
      double count = info.getWeight();
      double score = (count / tokensPerTopic[topic]) *
        Math.log( (count * numTypes) / tokensPerTopic[topic] );
      if (position < numTopWords) {
        scores.setTopicWordScore(topic, position, score);
      }
      
      topicScore += score;
      position++;
    }
    scores.setTopicScore(topic, topicScore);
  }
  return scores;
}

public TopicScores getDistanceFromUniform() {
  int[] tokensPerTopic = model.tokensPerTopic;
  TopicScores scores = new TopicScores("uniform_dist", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  int numTypes = alphabet.size();
  for (int topic = 0; topic < numTopics; topic++) {
    double topicScore = 0.0;
    int position = 0;
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    for (IDSorter info: sortedWords) {
      int type = info.getID();
      double count = info.getWeight();
      double score = (count / tokensPerTopic[topic]) *
        Math.log( (count * numTypes) / tokensPerTopic[topic] );
      if (position < numTopWords) {
        scores.setTopicWordScore(topic, position, score);
      }
      
      topicScore += score;
      position++;
    }
    scores.setTopicScore(topic, topicScore);
  }
  return scores;
}

public TopicScores getCoherence() {
  TopicScores scores = new TopicScores("coherence", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  for (int topic = 0; topic < numTopics; topic++) {
    int[][] matrix = topicCodocumentMatrices[topic];
    double topicScore = 0.0;
    for (int row = 0; row < numTopWords; row++) {
      double rowScore = 0.0;
      double minScore = 0.0;
      for (int col = 0; col < row; col++) {
        double score = Math.log( (matrix[row][col] + model.beta) / (matrix[col][col] + model.beta) );
        rowScore += score;
        if (score < minScore) { minScore = score; }
      }
      topicScore += rowScore;
      scores.setTopicWordScore(topic, row, minScore);
    }
    scores.setTopicScore(topic, topicScore);
  }
  
  return scores;
}

public TopicScores getCoherence() {
  TopicScores scores = new TopicScores("coherence", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  for (int topic = 0; topic < numTopics; topic++) {
    int[][] matrix = topicCodocumentMatrices[topic];
    double topicScore = 0.0;
    for (int row = 0; row < numTopWords; row++) {
      double rowScore = 0.0;
      double minScore = 0.0;
      for (int col = 0; col < row; col++) {
        double score = Math.log( (matrix[row][col] + model.beta) / (matrix[col][col] + model.beta) );
        rowScore += score;
        if (score < minScore) { minScore = score; }
      }
      topicScore += rowScore;
      scores.setTopicWordScore(topic, row, minScore);
    }
    scores.setTopicScore(topic, topicScore);
  }
  
  return scores;
}

public TopicScores getCoherence() {
  TopicScores scores = new TopicScores("coherence", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  for (int topic = 0; topic < numTopics; topic++) {
    int[][] matrix = topicCodocumentMatrices[topic];
    double topicScore = 0.0;
    for (int row = 0; row < numTopWords; row++) {
      double rowScore = 0.0;
      double minScore = 0.0;
      for (int col = 0; col < row; col++) {
        double score = Math.log( (matrix[row][col] + model.beta) / (matrix[col][col] + model.beta) );
        rowScore += score;
        if (score < minScore) { minScore = score; }
      }
      topicScore += rowScore;
      scores.setTopicWordScore(topic, row, minScore);
    }
    scores.setTopicScore(topic, topicScore);
  }
  
  return scores;
}

/** Low-quality topics often have lots of unusually short words. */
public TopicScores getWordLengthScores() {
  TopicScores scores = new TopicScores("word-length", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  for (int topic = 0; topic < numTopics; topic++) {
    int total = 0;
    for (int position = 0; position < topicTopWords[topic].length; position++) {
      if (topicTopWords[topic][position] == null) { break; }
      
      int length = topicTopWords[topic][position].length();
      total += length;
      scores.setTopicWordScore(topic, position, length);
    }
    scores.setTopicScore(topic, (double) total / topicTopWords[topic].length);
  }
  return scores;
}

/** Low-quality topics often have lots of unusually short words. */
public TopicScores getWordLengthScores() {
  TopicScores scores = new TopicScores("word-length", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  for (int topic = 0; topic < numTopics; topic++) {
    int total = 0;
    for (int position = 0; position < topicTopWords[topic].length; position++) {
      if (topicTopWords[topic][position] == null) { break; }
      
      int length = topicTopWords[topic][position].length();
      total += length;
      scores.setTopicWordScore(topic, position, length);
    }
    scores.setTopicScore(topic, (double) total / topicTopWords[topic].length);
  }
  return scores;
}

/** Low-quality topics often have lots of unusually short words. */
public TopicScores getWordLengthScores() {
  TopicScores scores = new TopicScores("word-length", numTopics, numTopWords);
  scores.wordScoresDefined = true;
  for (int topic = 0; topic < numTopics; topic++) {
    int total = 0;
    for (int position = 0; position < topicTopWords[topic].length; position++) {
      if (topicTopWords[topic][position] == null) { break; }
      
      int length = topicTopWords[topic][position].length();
      total += length;
      scores.setTopicWordScore(topic, position, length);
    }
    scores.setTopicScore(topic, (double) total / topicTopWords[topic].length);
  }
  return scores;
}

Popular in Java

Reading from database using SQL prepared statement
getResourceAsStream (ClassLoader)
getSystemService (Context)
onCreateOptionsMenu (Activity)
Time (java.sql)
Java representation of an SQL TIME value. Provides utilities to format and parse the time's represen
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
Enumeration (java.util)
A legacy iteration interface.New code should use Iterator instead. Iterator replaces the enumeration
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
GridLayout (java.awt)
The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
Top Sublime Text plugins

How to use setTopicWordScoremethodin cc.mallet.topics.TopicModelDiagnostics$TopicScores

Best Java code snippets using cc.mallet.topics.TopicModelDiagnostics$TopicScores.setTopicWordScore (Showing top 20 results out of 315)

How to use
setTopicWordScore
method
in
cc.mallet.topics.TopicModelDiagnostics$TopicScores