public static void main(String... args) throws Exception { String programName = Train.class.getName(); String usage = String.format("usage: java %s DIR\n\n" + "The directory DIR should contain the training-data.xxx file as\n" + "created by a classifier DataWriter\n", programName); // usage message for wrong number of arguments if (args.length < 1) { System.err.format("error: wrong number of arguments\n%s", usage); System.exit(1); } // parse out the training directory from the arguments File dir = new File(args[0]); String[] remainingArgs = new String[args.length - 1]; System.arraycopy(args, 1, remainingArgs, 0, remainingArgs.length); // train and package the classifier Train.main(dir, remainingArgs); }
@Override public void trainClassifier(File dir, String... args) throws Exception { String[] delegatedArgs = new String[args.length + 1]; System.arraycopy(args, 0, delegatedArgs, 1, args.length); delegatedArgs[0] = this.getDelegatedModelDirectory(dir).getPath(); Train.main(delegatedArgs); }
public void train(File directory, Model.Params params) throws Exception { Train.main(this.getModelDirectory(directory, params), params.trainingArguments); }
@Override public void train(CollectionReader collectionReader, File outputDirectory) throws Exception { // assemble the training pipeline AggregateBuilder aggregate = new AggregateBuilder(); // an annotator that loads the text from the training file URIs aggregate.add(UriToDocumentTextAnnotator.getDescription()); // an annotator that parses and loads MASC named entity annotations (and tokens) aggregate.add(MascGoldAnnotator.getDescription()); // an annotator that adds part-of-speech tags aggregate.add(PosTaggerAnnotator.getDescription()); // our NamedEntityChunker annotator, configured to write Mallet CRF training data aggregate.add(AnalysisEngineFactory.createEngineDescription( NamedEntityChunker.class, CleartkSequenceAnnotator.PARAM_IS_TRAINING, true, DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, outputDirectory, DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, MalletCrfStringOutcomeDataWriter.class)); // run the pipeline over the training corpus SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription()); // quiet Mallet down a bit (but still leave likelihoods so you can see progress) Logger malletLogger = Logger.getLogger("cc.mallet"); malletLogger.setLevel(Level.WARNING); Logger likelihoodLogger = Logger.getLogger("cc.mallet.fst.CRFOptimizableByLabelLikelihood"); likelihoodLogger.setLevel(Level.INFO); // train a Mallet CRF model on the training data Train.main(outputDirectory); }
Train.main(outputDirectory);
org.cleartk.ml.jar.Train.main(args); logger.info("finished training."); hider.restoreOutput();
org.cleartk.ml.jar.Train.main(args); logger.info("finished training."); hider.restoreOutput();
public static void main(String[] args) throws Exception { Options options = CliFactory.parseArguments(Options.class, args); // a reader that loads the URIs of the training files CollectionReaderDescription reader = UriCollectionReader.getDescriptionFromDirectory( options.getTrainDirectory(), MascTextFileFilter.class, null); // assemble the training pipeline AggregateBuilder aggregate = new AggregateBuilder(); // an annotator that loads the text from the training file URIs aggregate.add(UriToDocumentTextAnnotator.getDescription()); // an annotator that parses and loads MASC named entity annotations (and tokens) aggregate.add(MascGoldAnnotator.getDescription()); // an annotator that adds part-of-speech tags (so we can use them for features) aggregate.add(PosTaggerAnnotator.getDescription()); // our NamedEntityChunker annotator, configured to write Mallet CRF training data aggregate.add(AnalysisEngineFactory.createEngineDescription( NamedEntityChunker.class, CleartkSequenceAnnotator.PARAM_IS_TRAINING, true, DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, options.getModelDirectory(), DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, MalletCrfStringOutcomeDataWriter.class)); // run the pipeline over the training corpus SimplePipeline.runPipeline(reader, aggregate.createAggregateDescription()); // train a Mallet CRF model on the training data Train.main(options.getModelDirectory()); }