public static void main (String[] args) { try { InstanceList training = InstanceList.load (new File(args[0])); int numTopics = args.length > 1 ? Integer.parseInt(args[1]) : 200; ParallelTopicModel lda = new ParallelTopicModel (numTopics, 50.0, 0.01); lda.printLogLikelihood = true; lda.setTopicDisplay(50, 7); lda.addInstances(training); lda.setNumThreads(Integer.parseInt(args[2])); lda.estimate(); logger.info("printing state"); lda.printState(new File("state.gz")); logger.info("finished printing"); } catch (Exception e) { e.printStackTrace(); } }
public static void main (String[] args) { try { InstanceList training = InstanceList.load (new File(args[0])); int numTopics = args.length > 1 ? Integer.parseInt(args[1]) : 200; ParallelTopicModel lda = new ParallelTopicModel (numTopics, 50.0, 0.01); lda.printLogLikelihood = true; lda.setTopicDisplay(50, 7); lda.addInstances(training); lda.setNumThreads(Integer.parseInt(args[2])); lda.estimate(); logger.info("printing state"); lda.printState(new File("state.gz")); logger.info("finished printing"); } catch (Exception e) { e.printStackTrace(); } }
public static void main (String[] args) { try { InstanceList training = InstanceList.load (new File(args[0])); int numTopics = args.length > 1 ? Integer.parseInt(args[1]) : 200; ParallelTopicModel lda = new ParallelTopicModel (numTopics, 50.0, 0.01); lda.printLogLikelihood = true; lda.setTopicDisplay(50, 7); lda.addInstances(training); lda.setNumThreads(Integer.parseInt(args[2])); lda.estimate(); logger.info("printing state"); lda.printState(new File("state.gz")); logger.info("finished printing"); } catch (Exception e) { e.printStackTrace(); } }
public static void main (String[] args) throws Exception { InstanceList instances = InstanceList.load(new File(args[0])); int numTopics = Integer.parseInt(args[1]); ParallelTopicModel model = new ParallelTopicModel(numTopics, 5.0, 0.01); model.addInstances(instances); model.setNumIterations(1000); model.estimate(); TopicModelDiagnostics diagnostics = new TopicModelDiagnostics(model, 20); if (args.length == 3) { PrintWriter out = new PrintWriter(args[2]); out.println(diagnostics.toXML()); out.close(); } } }
public static void main (String[] args) throws Exception { InstanceList instances = InstanceList.load(new File(args[0])); int numTopics = Integer.parseInt(args[1]); ParallelTopicModel model = new ParallelTopicModel(numTopics, 5.0, 0.01); model.addInstances(instances); model.setNumIterations(1000); model.estimate(); TopicModelDiagnostics diagnostics = new TopicModelDiagnostics(model, 20); if (args.length == 3) { PrintWriter out = new PrintWriter(args[2]); out.println(diagnostics.toXML()); out.close(); } } }
public static void main (String[] args) throws Exception { InstanceList instances = InstanceList.load(new File(args[0])); int numTopics = Integer.parseInt(args[1]); ParallelTopicModel model = new ParallelTopicModel(numTopics, 5.0, 0.01); model.addInstances(instances); model.setNumIterations(1000); model.estimate(); TopicModelDiagnostics diagnostics = new TopicModelDiagnostics(model, 20); if (args.length == 3) { PrintWriter out = new PrintWriter(args[2]); out.println(diagnostics.toXML()); out.close(); } } }
public static void main (String[] args) throws Exception { InstanceList instances = InstanceList.load(new File(args[0])); ParallelTopicModel model = new ParallelTopicModel(50, 5.0, 0.01); model.addInstances(instances); model.setNumIterations(100); model.estimate(); TopicReports reports = new JSONTopicReports(model); reports.printSummary(new File("summary.json"), 20); }
public static void main (String[] args) throws Exception { InstanceList instances = InstanceList.load(new File(args[0])); ParallelTopicModel model = new ParallelTopicModel(50, 5.0, 0.01); model.addInstances(instances); model.setNumIterations(100); model.estimate(); TopicReports reports = new JSONTopicReports(model); reports.printSummary(new File("summary.json"), 20); }
topicModel.estimate();
model.estimate();
/** * Estimate a topic model for collaborative filtering data. * * @param <U> user type * @param <I> item type * @param preferences preference data * @param k number of topics * @param alpha alpha in model * @param beta beta in model * @param numIterations number of iterations * @param burninPeriod burnin period * @return a topic model * @throws IOException when internal IO error occurs */ public static <U, I> ParallelTopicModel estimate(FastPreferenceData<U, I> preferences, int k, double alpha, double beta, int numIterations, int burninPeriod) throws IOException { ParallelTopicModel topicModel = new ParallelTopicModel(k, alpha * k, beta); topicModel.addInstances(new LDAInstanceList<>(preferences)); topicModel.setTopicDisplay(numIterations + 1, 0); topicModel.setNumIterations(numIterations); topicModel.setBurninPeriod(burninPeriod); topicModel.setNumThreads(Runtime.getRuntime().availableProcessors()); topicModel.estimate(); return topicModel; }
@Override protected void execute(JobSettings settings) throws AnalysisEngineProcessException { InstanceList instances = new InstanceList(new TopicModelPipe(stopwords)); instances.addThruPipe(getDocumentsFromMongo()); ParallelTopicModel model = new ParallelTopicModel(numTopics, 1.0, 0.01); model.setNumThreads(numThreads); model.setNumIterations(numIterations); model.addInstances(instances); try { model.estimate(); } catch (IOException e) { getMonitor().warn("Couldn't estimate topic model"); throw new AnalysisEngineProcessException(e); } File serializedModelFile = new File(modelFile); try { Files.createDirectories(serializedModelFile.toPath().getParent()); model.write(serializedModelFile); writeTopicAssignmentsToMongo(instances, new TopicWords(model), model); } catch (IOException e) { throw new AnalysisEngineProcessException("Error writing model", new Object[0], e); } }
malletParallelModel.setOptimizeInterval(optimizationInterval); LOGGER.info("Start training"); malletParallelModel.estimate(); model.malletModel = malletParallelModel; model.modelId = modelId;
topicModel.estimate();
malletParallelModel.setNumIterations(numIterations); malletParallelModel.setOptimizeInterval(optimizationInterval); malletParallelModel.estimate(); model.malletModel = malletParallelModel; } catch (Exception e) {
@Override protected void execute(JobSettings settings) throws AnalysisEngineProcessException { InstanceList instances = new InstanceList(new TopicModelPipe(stopwords)); instances.addThruPipe(getDocumentsFromMongo()); ParallelTopicModel model = new ParallelTopicModel(numTopics, 1.0, 0.01); model.setNumThreads(numThreads); model.setNumIterations(numIterations); model.addInstances(instances); try { model.estimate(); } catch (IOException e) { getMonitor().warn("Couldn't estimate topic model"); throw new AnalysisEngineProcessException(e); } File serializedModelFile = new File(modelFile); try { Files.createDirectories(serializedModelFile.toPath().getParent()); model.write(serializedModelFile); writeTopicAssignmentsToMongo(instances, new TopicWords(model), model); } catch (IOException e) { throw new AnalysisEngineProcessException("Error writing model", new Object[0], e); } }
@Override public void collectionProcessComplete() throws AnalysisEngineProcessException { try { ParallelTopicModel model = new ParallelTopicModel(nTopics, alphaSum, beta); model.addInstances(getInstanceList()); model.setNumThreads(getNumThreads()); model.setNumIterations(nIterations); model.setBurninPeriod(burninPeriod); model.setOptimizeInterval(optimizeInterval); model.setRandomSeed(randomSeed); model.setSaveSerializedModel(saveInterval, getTargetLocation()); model.setSymmetricAlpha(useSymmetricAlpha); model.setTopicDisplay(displayInterval, displayNTopicWords); model.estimate(); getLogger().info("Writing model to " + getTargetLocation()); File targetFile = new File(getTargetLocation()); if (targetFile.getParentFile() != null) { targetFile.getParentFile().mkdirs(); } model.write(targetFile); } catch (IOException | SecurityException e) { throw new AnalysisEngineProcessException(e); } }
model.setTopicDisplay(0,0); model.setNumIterations(2000); model.estimate();