cc.mallet.topics.LDAHyper java code examples

public static void main (String[] args) throws IOException {
  InstanceList training = InstanceList.load (new File(args[0]));
  int numTopics = args.length > 1 ? Integer.parseInt(args[1]) : 200;
  InstanceList testing = 
    args.length > 2 ? InstanceList.load (new File(args[2])) : null;
  LDAHyper lda = new LDAHyper (numTopics, 50.0, 0.01);
  lda.printLogLikelihood = true;
  lda.setTopicDisplay(50,7);
  lda.addInstances(training);
  lda.estimate();
}

printTopWords (System.out, wordsPerTopic, false);
   double el = empiricalLikelihood(1000, testing);
   double ll = modelLogLikelihood();
   double mi = topicLabelMutualInformation();
   System.out.println(ll + "\t" + el + "\t" + mi);
this.printState(new File(stateFilename + '.' + iterationsSoFar));
  cachedCoefficients[topic] =  alpha[topic] / (tokensPerTopic[topic] + betaSum);
clearHistograms();
FeatureSequence tokenSequence = (FeatureSequence) data.get(di).instance.getData();
LabelSequence topicSequence = (LabelSequence) data.get(di).topicSequence;
sampleTopicsForOneDoc (tokenSequence, topicSequence,
            iterationsSoFar >= burninPeriod && iterationsSoFar % saveSampleInterval == 0,
            true);
if (printLogLikelihood) System.out.println (modelLogLikelihood());

public void topicXMLReportPhrases (PrintStream out, int numWords) {
  int numTopics = this.getNumTopics();
  gnu.trove.TObjectIntHashMap<String>[] phrases = new gnu.trove.TObjectIntHashMap[numTopics];
  Alphabet alphabet = this.getAlphabet();
  for (int di = 0; di < this.getData().size(); di++) {
    LDAHyper.Topication t = this.getData().get(di);
    Instance instance = t.instance;
    FeatureSequence fvs = (FeatureSequence) instance.getData();
    for (int pi = 0; pi < doclen; pi++) {
      feature = fvs.getIndexAtPosition(pi);
      topic = this.getData().get(di).topicSequence.getIndexAtPosition(pi);
      if (topic == prevtopic && (!withBigrams || ((FeatureSequenceWithBigrams)fvs).getBiIndexAtPosition(pi) != -1)) {
        if (sb == null)
      probs[type] = this.getCountFeatureTopic(type, ti) / (double)this.getCountTokensPerTopic(ti);
    RankedFeatureVector rfv = new RankedFeatureVector (alphabet, probs);
    for (int ri = 0; ri < numWords; ri++) {
      int fi = rfv.getIndexAtRank(ri);
      pout.println ("      <term weight=\""+probs[fi]+"\" count=\""+this.getCountFeatureTopic(fi,ti)+"\">"+alphabet.lookupObject(fi)+	"</term>");
      if (ri < 20) // consider top 20 individual words as candidate titles
        titles.add(alphabet.lookupObject(fi), this.getCountFeatureTopic(fi,ti));

public void addInstances (InstanceList training) {
  initializeForTypes (training.getDataAlphabet());
  ArrayList<LabelSequence> topicSequences = new ArrayList<LabelSequence>();
  for (Instance instance : training) {
    LabelSequence topicSequence = new LabelSequence(topicAlphabet, new int[instanceLength(instance)]);
    if (false)
      // This method not yet obeying its last "false" argument, and must be for this to work
      sampleTopicsForOneDoc((FeatureSequence)instance.getData(), topicSequence, false, false);
    else {
      Randoms r = new Randoms();
      int[] topics = topicSequence.getFeatures();
      for (int i = 0; i < topics.length; i++)
        topics[i] = r.nextInt(numTopics);
    }
    topicSequences.add (topicSequence);
  }
  addInstances (training, topicSequences);
}

public void estimate () throws IOException {
  estimate (numIterations);
}

printTopWords (System.out, wordsPerTopic, false);
   double el = empiricalLikelihood(1000, testing);
   double ll = modelLogLikelihood();
   double mi = topicLabelMutualInformation();
   System.out.println(ll + "\t" + el + "\t" + mi);
this.printState(new File(stateFilename + '.' + iterationsSoFar));
  cachedCoefficients[topic] =  alpha[topic] / (tokensPerTopic[topic] + betaSum);
clearHistograms();
FeatureSequence tokenSequence = (FeatureSequence) data.get(di).instance.getData();
LabelSequence topicSequence = (LabelSequence) data.get(di).topicSequence;
sampleTopicsForOneDoc (tokenSequence, topicSequence,
            iterationsSoFar >= burninPeriod && iterationsSoFar % saveSampleInterval == 0,
            true);
if (printLogLikelihood) System.out.println (modelLogLikelihood());

public void topicXMLReportPhrases (PrintStream out, int numWords) {
  int numTopics = this.getNumTopics();
  gnu.trove.TObjectIntHashMap<String>[] phrases = new gnu.trove.TObjectIntHashMap[numTopics];
  Alphabet alphabet = this.getAlphabet();
  for (int di = 0; di < this.getData().size(); di++) {
    LDAHyper.Topication t = this.getData().get(di);
    Instance instance = t.instance;
    FeatureSequence fvs = (FeatureSequence) instance.getData();
    for (int pi = 0; pi < doclen; pi++) {
      feature = fvs.getIndexAtPosition(pi);
      topic = this.getData().get(di).topicSequence.getIndexAtPosition(pi);
      if (topic == prevtopic && (!withBigrams || ((FeatureSequenceWithBigrams)fvs).getBiIndexAtPosition(pi) != -1)) {
        if (sb == null)
      probs[type] = this.getCountFeatureTopic(type, ti) / (double)this.getCountTokensPerTopic(ti);
    RankedFeatureVector rfv = new RankedFeatureVector (alphabet, probs);
    for (int ri = 0; ri < numWords; ri++) {
      int fi = rfv.getIndexAtRank(ri);
      pout.println ("      <term weight=\""+probs[fi]+"\" count=\""+this.getCountFeatureTopic(fi,ti)+"\">"+alphabet.lookupObject(fi)+	"</term>");
      if (ri < 20) // consider top 20 individual words as candidate titles
        titles.add(alphabet.lookupObject(fi), this.getCountFeatureTopic(fi,ti));

public void addInstances (InstanceList training) {
  initializeForTypes (training.getDataAlphabet());
  ArrayList<LabelSequence> topicSequences = new ArrayList<LabelSequence>();
  for (Instance instance : training) {
    LabelSequence topicSequence = new LabelSequence(topicAlphabet, new int[instanceLength(instance)]);
    if (false)
      // This method not yet obeying its last "false" argument, and must be for this to work
      sampleTopicsForOneDoc((FeatureSequence)instance.getData(), topicSequence, false, false);
    else {
      Randoms r = new Randoms();
      int[] topics = topicSequence.getFeatures();
      for (int i = 0; i < topics.length; i++)
        topics[i] = r.nextInt(numTopics);
    }
    topicSequences.add (topicSequence);
  }
  addInstances (training, topicSequences);
}

public void estimate () throws IOException {
  estimate (numIterations);
}

printTopWords (System.out, wordsPerTopic, false);
   double el = empiricalLikelihood(1000, testing);
   double ll = modelLogLikelihood();
   double mi = topicLabelMutualInformation();
   System.out.println(ll + "\t" + el + "\t" + mi);
this.printState(new File(stateFilename + '.' + iterationsSoFar));
  cachedCoefficients[topic] =  alpha[topic] / (tokensPerTopic[topic] + betaSum);
clearHistograms();
FeatureSequence tokenSequence = (FeatureSequence) data.get(di).instance.getData();
LabelSequence topicSequence = (LabelSequence) data.get(di).topicSequence;
sampleTopicsForOneDoc (tokenSequence, topicSequence,
            iterationsSoFar >= burninPeriod && iterationsSoFar % saveSampleInterval == 0,
            true);
if (printLogLikelihood) System.out.println (modelLogLikelihood());

public void topicXMLReportPhrases (PrintStream out, int numWords) {
  int numTopics = this.getNumTopics();
  gnu.trove.TObjectIntHashMap<String>[] phrases = new gnu.trove.TObjectIntHashMap[numTopics];
  Alphabet alphabet = this.getAlphabet();
  for (int di = 0; di < this.getData().size(); di++) {
    LDAHyper.Topication t = this.getData().get(di);
    Instance instance = t.instance;
    FeatureSequence fvs = (FeatureSequence) instance.getData();
    for (int pi = 0; pi < doclen; pi++) {
      feature = fvs.getIndexAtPosition(pi);
      topic = this.getData().get(di).topicSequence.getIndexAtPosition(pi);
      if (topic == prevtopic && (!withBigrams || ((FeatureSequenceWithBigrams)fvs).getBiIndexAtPosition(pi) != -1)) {
        if (sb == null)
      probs[type] = this.getCountFeatureTopic(type, ti) / (double)this.getCountTokensPerTopic(ti);
    RankedFeatureVector rfv = new RankedFeatureVector (alphabet, probs);
    for (int ri = 0; ri < numWords; ri++) {
      int fi = rfv.getIndexAtRank(ri);
      pout.println ("      <term weight=\""+probs[fi]+"\" count=\""+this.getCountFeatureTopic(fi,ti)+"\">"+alphabet.lookupObject(fi)+	"</term>");
      if (ri < 20) // consider top 20 individual words as candidate titles
        titles.add(alphabet.lookupObject(fi), this.getCountFeatureTopic(fi,ti));

public static void main (String[] args) throws IOException {
  InstanceList training = InstanceList.load (new File(args[0]));
  int numTopics = args.length > 1 ? Integer.parseInt(args[1]) : 200;
  InstanceList testing = 
    args.length > 2 ? InstanceList.load (new File(args[2])) : null;
  LDAHyper lda = new LDAHyper (numTopics, 50.0, 0.01);
  lda.printLogLikelihood = true;
  lda.setTopicDisplay(50,7);
  lda.addInstances(training);
  lda.estimate();
}

public void addInstances (InstanceList training) {
  initializeForTypes (training.getDataAlphabet());
  ArrayList<LabelSequence> topicSequences = new ArrayList<LabelSequence>();
  for (Instance instance : training) {
    LabelSequence topicSequence = new LabelSequence(topicAlphabet, new int[instanceLength(instance)]);
    if (false)
      // This method not yet obeying its last "false" argument, and must be for this to work
      sampleTopicsForOneDoc((FeatureSequence)instance.getData(), topicSequence, false, false);
    else {
      Randoms r = new Randoms();
      int[] topics = topicSequence.getFeatures();
      for (int i = 0; i < topics.length; i++)
        topics[i] = r.nextInt(numTopics);
    }
    topicSequences.add (topicSequence);
  }
  addInstances (training, topicSequences);
}

public void estimate () throws IOException {
  estimate (numIterations);
}

public static void main (String[] args) throws IOException {
  InstanceList training = InstanceList.load (new File(args[0]));
  int numTopics = args.length > 1 ? Integer.parseInt(args[1]) : 200;
  InstanceList testing = 
    args.length > 2 ? InstanceList.load (new File(args[2])) : null;
  LDAHyper lda = new LDAHyper (numTopics, 50.0, 0.01);
  lda.printLogLikelihood = true;
  lda.setTopicDisplay(50,7);
  lda.addInstances(training);
  lda.estimate();
}

How to useLDAHyper in cc.mallet.topics

Best Java code snippets using cc.mallet.topics.LDAHyper (Showing top 15 results out of 315)

How to use
LDAHyper
in
cc.mallet.topics