public static void main (String[] args) throws IOException { InstanceList ilist = InstanceList.load (new File(args[0])); int numIterations = args.length > 1 ? Integer.parseInt(args[1]) : 1000; int numTopWords = args.length > 2 ? Integer.parseInt(args[2]) : 20; System.out.println ("Data loaded."); LDA lda = new LDA (10); lda.estimate (ilist, numIterations, 50, 0, null, new Randoms()); // should be 1100 lda.printTopWords (numTopWords, true); lda.printDocumentTopics (new File(args[0]+".lda")); }
public void printState (File f) throws IOException { PrintWriter writer = new PrintWriter (new FileWriter(f)); printState (writer); writer.close(); }
public void estimate (int docIndexStart, int docIndexLength, int numIterations, int showTopicsInterval, int outputModelInterval, String outputModelFilename, Randoms r) { long startTime = System.currentTimeMillis(); for (int iterations = 0; iterations < numIterations; iterations++) { if (iterations % 10 == 0) System.out.print (iterations); else System.out.print ("."); System.out.flush(); if (showTopicsInterval != 0 && iterations % showTopicsInterval == 0 && iterations > 0) { System.out.println (); printTopWords (5, false); } if (outputModelInterval != 0 && iterations % outputModelInterval == 0 && iterations > 0) { this.write (new File(outputModelFilename+'.'+iterations)); } sampleTopicsForDocs(docIndexStart, docIndexLength, r); } long seconds = Math.round((System.currentTimeMillis() - startTime)/1000.0); long minutes = seconds / 60; seconds %= 60; long hours = minutes / 60; minutes %= 60; long days = hours / 24; hours %= 24; System.out.print ("\nTotal time: "); if (days != 0) { System.out.print(days); System.out.print(" days "); } if (hours != 0) { System.out.print(hours); System.out.print(" hours "); } if (minutes != 0) { System.out.print(minutes); System.out.print(" minutes "); } System.out.print(seconds); System.out.println(" seconds"); }
public void sampleTopicsForDocs (int start, int length, Randoms r) { assert (start+length <= docTopicCounts.length); double[] topicWeights = new double[numTopics]; // Loop over every word in the corpus for (int di = start; di < start+length; di++) { sampleTopicsForOneDoc ((FeatureSequence)ilist.get(di).getData(), topics[di], docTopicCounts[di], topicWeights, r); } }
public void printDocumentTopics (PrintWriter pw) { printDocumentTopics (pw, 0.0, -1); }
this.estimate(0, numDocs, numIterations, showTopicsInterval, outputModelInterval, outputModelFilename, r);
public void estimate (int docIndexStart, int docIndexLength, int numIterations, int showTopicsInterval, int outputModelInterval, String outputModelFilename, Randoms r) { long startTime = System.currentTimeMillis(); for (int iterations = 0; iterations < numIterations; iterations++) { if (iterations % 10 == 0) System.out.print (iterations); else System.out.print ("."); System.out.flush(); if (showTopicsInterval != 0 && iterations % showTopicsInterval == 0 && iterations > 0) { System.out.println (); printTopWords (5, false); } if (outputModelInterval != 0 && iterations % outputModelInterval == 0 && iterations > 0) { this.write (new File(outputModelFilename+'.'+iterations)); } sampleTopicsForDocs(docIndexStart, docIndexLength, r); } long seconds = Math.round((System.currentTimeMillis() - startTime)/1000.0); long minutes = seconds / 60; seconds %= 60; long hours = minutes / 60; minutes %= 60; long days = hours / 24; hours %= 24; System.out.print ("\nTotal time: "); if (days != 0) { System.out.print(days); System.out.print(" days "); } if (hours != 0) { System.out.print(hours); System.out.print(" hours "); } if (minutes != 0) { System.out.print(minutes); System.out.print(" minutes "); } System.out.print(seconds); System.out.println(" seconds"); }
public void sampleTopicsForAllDocs (Randoms r) { double[] topicWeights = new double[numTopics]; // Loop over every word in the corpus for (int di = 0; di < topics.length; di++) { sampleTopicsForOneDoc ((FeatureSequence)ilist.get(di).getData(), topics[di], docTopicCounts[di], topicWeights, r); } }
public void printDocumentTopics (PrintWriter pw) { printDocumentTopics (pw, 0.0, -1); }
this.estimate(0, numDocs, numIterations, showTopicsInterval, outputModelInterval, outputModelFilename, r);
public static void main (String[] args) throws IOException { InstanceList ilist = InstanceList.load (new File(args[0])); int numIterations = args.length > 1 ? Integer.parseInt(args[1]) : 1000; int numTopWords = args.length > 2 ? Integer.parseInt(args[2]) : 20; System.out.println ("Data loaded."); LDA lda = new LDA (10); lda.estimate (ilist, numIterations, 50, 0, null, new Randoms()); // should be 1100 lda.printTopWords (numTopWords, true); lda.printDocumentTopics (new File(args[0]+".lda")); }
public void estimate (int docIndexStart, int docIndexLength, int numIterations, int showTopicsInterval, int outputModelInterval, String outputModelFilename, Randoms r) { long startTime = System.currentTimeMillis(); for (int iterations = 0; iterations < numIterations; iterations++) { if (iterations % 10 == 0) System.out.print (iterations); else System.out.print ("."); System.out.flush(); if (showTopicsInterval != 0 && iterations % showTopicsInterval == 0 && iterations > 0) { System.out.println (); printTopWords (5, false); } if (outputModelInterval != 0 && iterations % outputModelInterval == 0 && iterations > 0) { this.write (new File(outputModelFilename+'.'+iterations)); } sampleTopicsForDocs(docIndexStart, docIndexLength, r); } long seconds = Math.round((System.currentTimeMillis() - startTime)/1000.0); long minutes = seconds / 60; seconds %= 60; long hours = minutes / 60; minutes %= 60; long days = hours / 24; hours %= 24; System.out.print ("\nTotal time: "); if (days != 0) { System.out.print(days); System.out.print(" days "); } if (hours != 0) { System.out.print(hours); System.out.print(" hours "); } if (minutes != 0) { System.out.print(minutes); System.out.print(" minutes "); } System.out.print(seconds); System.out.println(" seconds"); }
public void sampleTopicsForAllDocs (Randoms r) { double[] topicWeights = new double[numTopics]; // Loop over every word in the corpus for (int di = 0; di < topics.length; di++) { sampleTopicsForOneDoc ((FeatureSequence)ilist.get(di).getData(), topics[di], docTopicCounts[di], topicWeights, r); } }
public void printDocumentTopics (PrintWriter pw) { printDocumentTopics (pw, 0.0, -1); }
public void printState (File f) throws IOException { PrintWriter writer = new PrintWriter (new FileWriter(f)); printState (writer); writer.close(); }
this.estimate(0, numDocs, numIterations, showTopicsInterval, outputModelInterval, outputModelFilename, r);
public static void main (String[] args) throws IOException { InstanceList ilist = InstanceList.load (new File(args[0])); int numIterations = args.length > 1 ? Integer.parseInt(args[1]) : 1000; int numTopWords = args.length > 2 ? Integer.parseInt(args[2]) : 20; System.out.println ("Data loaded."); LDA lda = new LDA (10); lda.estimate (ilist, numIterations, 50, 0, null, new Randoms()); // should be 1100 lda.printTopWords (numTopWords, true); lda.printDocumentTopics (new File(args[0]+".lda")); }
public void sampleTopicsForAllDocs (Randoms r) { double[] topicWeights = new double[numTopics]; // Loop over every word in the corpus for (int di = 0; di < topics.length; di++) { sampleTopicsForOneDoc ((FeatureSequence)ilist.get(di).getData(), topics[di], docTopicCounts[di], topicWeights, r); } }
public void printDocumentTopics (File f) throws IOException { printDocumentTopics (new PrintWriter (new FileWriter (f))); }
public void printState (File f) throws IOException { PrintWriter writer = new PrintWriter (new FileWriter(f)); printState (writer); writer.close(); }