cc.mallet.types.IDSorter java code examples

for (int topic = 0; topic < numTopics; topic++) {
  sortedTopics[topic] = new IDSorter(topic, topic);
    sortedTopics[topic].set(topic, (float) topicCounts[topic] / totalLength);
    if (sortedTopics[i].getWeight() < threshold) { break; }
    pw.print (sortedTopics[i].getID() + " " + 
         sortedTopics[i].getWeight() + " ");

for (int topic = 0; topic < numTopics; topic++) {
     Iterator<IDSorter> iterator = topicSortedWords.get(topic).iterator();
     out = new Formatter(new StringBuilder(), Locale.US);
     out.format("%d\t%.3f\t", topic, model.getTopicProbabilities(docID)[topic]);
     int rank = 0;
     while (iterator.hasNext() && rank < 5) {
       IDSorter idCountPair = iterator.next();
       out.format("%s (%.3f) ", dataAlphabet.lookupObject(idCountPair.getID()), idCountPair.getWeight());
       rank++;
     }
     System.out.println(out);
   }
   System.out.println("\n");

public String topWords (int numWords) {
  StringBuilder output = new StringBuilder();
  IDSorter[] sortedWords = new IDSorter[numTypes];
  for (int topic = 0; topic < numTopics; topic++) {
    for (int type = 0; type < numTypes; type++) {
      sortedWords[type] = new IDSorter(type, typeTopicCounts[type][topic]);
    }
    Arrays.sort(sortedWords);
    
    output.append(topic + "\t" + tokensPerTopic[topic] + "\t");
    for (int i=0; i < numWords; i++) {
      output.append(alphabet.lookupObject(sortedWords[i].getID()) + " ");
    }
    output.append("\n");
  }
  return output.toString();
}

public void findClosest(double[] targetVector) {
  IDSorter[] sortedWords = new IDSorter[numWords];
  double targetSquaredSum = 0.0;
  for (int col = 0; col < numColumns; col++) {
    targetSquaredSum += targetVector[col] * targetVector[col];
  }
  double targetNormalizer = 1.0 / Math.sqrt(targetSquaredSum);
  System.out.println(targetSquaredSum);
  for (int word = 0; word < numWords; word++) {
    
    double innerProduct = 0.0;
    
    double wordSquaredSum = 0.0;
    for (int col = 0; col < numColumns; col++) {
      wordSquaredSum += weights[word * stride + col] * weights[word * stride + col];
    }
    double wordNormalizer = 1.0 / Math.sqrt(wordSquaredSum);
    for (int col = 0; col < numColumns; col++) {
      innerProduct += targetNormalizer * targetVector[col] * wordNormalizer * weights[word * stride + col];
    }
    sortedWords[word] = new IDSorter(word, innerProduct);
  }
  Arrays.sort(sortedWords);
  
  for (int i = 0; i < 10; i++) {
    System.out.format("%f\t%d\t%s\n", sortedWords[i].getWeight(), sortedWords[i].getID(), vocabulary.lookupObject(sortedWords[i].getID()));
  }
}

public void topicXMLReport (PrintWriter out, int numWords) {
  ArrayList<TreeSet<IDSorter>> topicSortedWords = getSortedWords();
  out.println("<?xml version='1.0' ?>");
  out.println("<topicModel>");
  for (int topic = 0; topic < numTopics; topic++) {
    out.println("  <topic id='" + topic + "' alpha='" + alpha[topic] +
          "' totalTokens='" + tokensPerTopic[topic] + "'>");
    int word = 1;
    Iterator<IDSorter> iterator = topicSortedWords.get(topic).iterator();
    while (iterator.hasNext() && word <= numWords) {
      IDSorter info = iterator.next();
      out.println("	<word rank='" + word + "'>" +
           alphabet.lookupObject(info.getID()) +
           "</word>");
      word++;
    }
    out.println("  </topic>");
  }
  out.println("</topicModel>");
}

for (int position=0; position < limit; position++) {
  IDSorter info = iterator.next();
  double probability = info.getWeight() / tokensPerTopic[topic];
  cumulativeProbability += probability;
  formatter.format("<word rank='%d' count='%.0f' prob='%.5f' cumulative='%.5f' docs='%d'", position+1, info.getWeight(), probability, cumulativeProbability, matrix[position][position]);

public IDSorter[] getSortedTopicWords(int topic) {
  IDSorter[] sortedTypes = new IDSorter[ numTypes ];
  for (int type = 0; type < numTypes; type++)
    sortedTypes[type] = new IDSorter(type, typeTopicCounts[type].get(topic));
  Arrays.sort(sortedTypes);
  return sortedTypes;
}

public void topicXMLReport (PrintWriter out, int numWords) {
  ArrayList<TreeSet<IDSorter>> topicSortedWords = getSortedWords();
  
  out.println("<?xml version='1.0' ?>");
  out.println("<topicModel>");
  for (int topic = 0; topic < numTopics; topic++) {
    out.println("  <topic id='" + topic + "' alpha='" + alpha[topic] +
          "' totalTokens='" + tokensPerTopic[topic] + "'>");
    int rank = 1;
    Iterator<IDSorter> iterator = topicSortedWords.get(topic).iterator();
    while (iterator.hasNext() && rank <= numWords) {
      IDSorter info = iterator.next();
      out.println("	<word rank='" + rank + "' count='" + info.getWeight() + "'>" +
           alphabet.lookupObject(info.getID()) + 
           "</word>");
      rank++;
    }
    out.println("  </topic>");
  }
  out.println("</topicModel>");
}

public void findClosest(double[] targetVector) {
  IDSorter[] sortedWords = new IDSorter[numWords];
  double targetSquaredSum = 0.0;
  for (int col = 0; col < numColumns; col++) {
    targetSquaredSum += targetVector[col] * targetVector[col];
  }
  double targetNormalizer = 1.0 / Math.sqrt(targetSquaredSum);
  System.out.println(targetSquaredSum);
  for (int word = 0; word < numWords; word++) {
    
    double innerProduct = 0.0;
    
    double wordSquaredSum = 0.0;
    for (int col = 0; col < numColumns; col++) {
      wordSquaredSum += weights[word * stride + col] * weights[word * stride + col];
    }
    double wordNormalizer = 1.0 / Math.sqrt(wordSquaredSum);
    for (int col = 0; col < numColumns; col++) {
      innerProduct += targetNormalizer * targetVector[col] * wordNormalizer * weights[word * stride + col];
    }
    sortedWords[word] = new IDSorter(word, innerProduct);
  }
  Arrays.sort(sortedWords);
  
  for (int i = 0; i < 10; i++) {
    System.out.format("%f\t%d\t%s\n", sortedWords[i].getWeight(), sortedWords[i].getID(), vocabulary.lookupObject(sortedWords[i].getID()));
  }
}

public String topWords (int numWords) {
  StringBuilder output = new StringBuilder();
  IDSorter[] sortedWords = new IDSorter[numTypes];
  for (int topic = 0; topic < numTopics; topic++) {
    for (int type = 0; type < numTypes; type++) {
      sortedWords[type] = new IDSorter(type, typeTopicCounts[type][topic]);
    }
    Arrays.sort(sortedWords);
    
    output.append(topic + "\t" + tokensPerTopic[topic] + "\t");
    for (int i=0; i < numWords; i++) {
      output.append(alphabet.lookupObject(sortedWords[i].getID()) + " ");
    }
    output.append("\n");
  }
  return output.toString();
}

/** Return an array (one element for each topic) of arrays of words, which
 *  are the most probable words for that topic in descending order. These
 *  are returned as Objects, but will probably be Strings.
 *
 *  @param numWords The maximum length of each topic's array of words (may be less).
 */

public Object[][] getTopWords(int numWords) {
  ArrayList<TreeSet<IDSorter>> topicSortedWords = getSortedWords();
  Object[][] result = new Object[ numTopics ][];
  for (int topic = 0; topic < numTopics; topic++) {
    
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    
    // How many words should we report? Some topics may have fewer than
    //  the default number of words with non-zero weight.
    int limit = numWords;
    if (sortedWords.size() < numWords) { limit = sortedWords.size(); }
    result[topic] = new Object[limit];
    Iterator<IDSorter> iterator = sortedWords.iterator();
    for (int i=0; i < limit; i++) {
      IDSorter info = iterator.next();
      result[topic][i] = alphabet.lookupObject(info.getID());
    }
  }
  return result;
}

for (int position=0; position < limit; position++) {
  IDSorter info = iterator.next();
  double probability = info.getWeight() / tokensPerTopic[topic];
  cumulativeProbability += probability;
  formatter.format("<word rank='%d' count='%.0f' prob='%.5f' cumulative='%.5f' docs='%d'", position+1, info.getWeight(), probability, cumulativeProbability, matrix[position][position]);

public IDSorter[] getSortedTopicWords(int topic) {
  IDSorter[] sortedTypes = new IDSorter[ numTypes ];
  for (int type = 0; type < numTypes; type++)
    sortedTypes[type] = new IDSorter(type, typeTopicCounts[type].get(topic));
  Arrays.sort(sortedTypes);
  return sortedTypes;
}

for (int topic = 0; topic < numTopics; topic++) {
  sortedTopics[topic] = new IDSorter(topic, topic);
    sortedTopics[topic].set(topic, (float) topicCounts[topic] / totalLength);
    if (sortedTopics[i].getWeight() < threshold) { break; }
    pw.print (sortedTopics[i].getID() + " " + 
         sortedTopics[i].getWeight() + " ");

public void topicXMLReport (PrintWriter out, int numWords) {
  ArrayList<TreeSet<IDSorter>> topicSortedWords = getSortedWords();
  
  out.println("<?xml version='1.0' ?>");
  out.println("<topicModel>");
  for (int topic = 0; topic < numTopics; topic++) {
    out.println("  <topic id='" + topic + "' alpha='" + alpha[topic] +
          "' totalTokens='" + tokensPerTopic[topic] + "'>");
    int rank = 1;
    Iterator<IDSorter> iterator = topicSortedWords.get(topic).iterator();
    while (iterator.hasNext() && rank <= numWords) {
      IDSorter info = iterator.next();
      out.println("	<word rank='" + rank + "' count='" + info.getWeight() + "'>" +
           alphabet.lookupObject(info.getID()) + 
           "</word>");
      rank++;
    }
    out.println("  </topic>");
  }
  out.println("</topicModel>");
}

public String getTopWords(int numWords, boolean withWeight) {
  IDSorter[] sortedTypes = new IDSorter[numTypes];

  for (int type=0; type < numTypes; type++) {
    sortedTypes[type] = new IDSorter(type, typeCounts[type]);
  }
  Arrays.sort(sortedTypes);

  Alphabet alphabet = instances.getDataAlphabet();
  StringBuffer out = new StringBuffer();
  for (int i = 0; i < numWords; i++) {
    if (withWeight){
      out.append(alphabet.lookupObject(sortedTypes[i].getID()) + ":" + sortedTypes[i].getWeight() + " ");
    }else
      out.append(alphabet.lookupObject(sortedTypes[i].getID()) + " ");
  }
  return out.toString();
}

public String topWords (int numWords) {
  StringBuilder output = new StringBuilder();
  IDSorter[] sortedWords = new IDSorter[numTypes];
  for (int topic = 0; topic < numTopics; topic++) {
    for (int type = 0; type < numTypes; type++) {
      sortedWords[type] = new IDSorter(type, typeTopicCounts[type][topic]);
    }
    Arrays.sort(sortedWords);
    
    output.append(topic + "\t" + tokensPerTopic[topic] + "\t");
    for (int i=0; i < numWords; i++) {
      output.append(alphabet.lookupObject(sortedWords[i].getID()) + " ");
    }
    output.append("\n");
  }
  return output.toString();
}

/** Return an array (one element for each topic) of arrays of words, which
 *  are the most probable words for that topic in descending order. These
 *  are returned as Objects, but will probably be Strings.
 *
 *  @param numWords The maximum length of each topic's array of words (may be less).
 */

public Object[][] getTopWords(int numWords) {
  ArrayList<TreeSet<IDSorter>> topicSortedWords = getSortedWords();
  Object[][] result = new Object[ numTopics ][];
  for (int topic = 0; topic < numTopics; topic++) {
    
    TreeSet<IDSorter> sortedWords = topicSortedWords.get(topic);
    
    // How many words should we report? Some topics may have fewer than
    //  the default number of words with non-zero weight.
    int limit = numWords;
    if (sortedWords.size() < numWords) { limit = sortedWords.size(); }
    result[topic] = new Object[limit];
    Iterator<IDSorter> iterator = sortedWords.iterator();
    for (int i=0; i < limit; i++) {
      IDSorter info = iterator.next();
      result[topic][i] = alphabet.lookupObject(info.getID());
    }
  }
  return result;
}

for (int position=0; position < limit; position++) {
  IDSorter info = iterator.next();
  double probability = info.getWeight() / tokensPerTopic[topic];
  cumulativeProbability += probability;
  formatter.format("<word rank='%d' count='%.0f' prob='%.5f' cumulative='%.5f' docs='%d'", position+1, info.getWeight(), probability, cumulativeProbability, matrix[position][position]);

public IDSorter[] getSortedTopicWords(int topic) {
  IDSorter[] sortedTypes = new IDSorter[ numTypes ];
  for (int type = 0; type < numTypes; type++)
    sortedTypes[type] = new IDSorter(type, typeTopicCounts[type].get(topic));
  Arrays.sort(sortedTypes);
  return sortedTypes;
}

Javadoc

This class is contains a comparator for use in sorting integers that have associated floating point values. One example would be sorting words by probability in a Naive Bayes model. Ties are broken by the ID.

 
IDSorter[] sortedIDs = new IDSorter[n]; 
for (int i=0; i<n; i++) { 
sortedIDs[i] = new IDSorter(i, weights[i]); 
} 
Arrays.sort(sortedIDs); 
for (int i=0; i<10; i++) { 
// do something with the nth highest weighted item 
}

Most used methods

getID
getWeight
<init>
set
Reinitialize an IDSorter

Popular in Java

Creating JSON documents from java classes using gson
getSystemService (Context)
getApplicationContext (Context)
getResourceAsStream (ClassLoader)
EOFException (java.io)
Thrown when a program encounters the end of a file or stream during an input operation.
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
Notification (javax.management)
BoxLayout (javax.swing)
Best IntelliJ plugins

How to useIDSorter in cc.mallet.types

Best Java code snippets using cc.mallet.types.IDSorter (Showing top 20 results out of 315)

How to use
IDSorter
in
cc.mallet.types