@Override protected TempEval2010Evaluation getEvaluation(File trainDir, File testDir, File outputDir) throws Exception { List<ModelInfo<Time>> infos = new ArrayList<ModelInfo<Time>>(); infos.add(new TimeModelInfo(null, TimeAnnotator.FACTORY)); return new TempEval2010Evaluation( trainDir, testDir, outputDir, Arrays.asList(TempEval2010GoldAnnotator.PARAM_TEXT_VIEWS), TempEval2010GoldAnnotator.PARAM_TIME_EXTENT_VIEWS, TempEval2010Writer.PARAM_TIME_EXTENT_VIEW, Arrays.asList( DefaultSnowballStemmer.getDescription("English"), PosTaggerAnnotator.getDescription()), infos); } }
@Override protected TempEval2010Evaluation getEvaluation(File trainDir, File testDir, File outputDir) throws Exception { List<ModelInfo<Time>> modelInfos = new ArrayList<ModelInfo<Time>>(); modelInfos.add(new TimeModelInfo("timeType", TimeTypeAnnotator.FACTORY)); return new TempEval2010Evaluation( trainDir, testDir, outputDir, Arrays.asList( TempEval2010GoldAnnotator.PARAM_TEXT_VIEWS, TempEval2010GoldAnnotator.PARAM_TIME_EXTENT_VIEWS), TempEval2010GoldAnnotator.PARAM_TIME_ATTRIBUTE_VIEWS, TempEval2010Writer.PARAM_TIME_ATTRIBUTE_VIEW, Arrays.asList( DefaultSnowballStemmer.getDescription("English"), PosTaggerAnnotator.getDescription()), modelInfos); } }
@Override protected TempEval2010Evaluation getEvaluation(File trainDir, File testDir, File outputDir) throws Exception { List<ModelInfo<Event>> infos = new ArrayList<ModelInfo<Event>>(); infos.add(new EventModelInfo(null, EventAnnotator.FACTORY, new String[] { "MaxEnt" })); return new TempEval2010Evaluation( trainDir, testDir, outputDir, Arrays.asList(TempEval2010GoldAnnotator.PARAM_TEXT_VIEWS), TempEval2010GoldAnnotator.PARAM_EVENT_EXTENT_VIEWS, TempEval2010Writer.PARAM_EVENT_EXTENT_VIEW, Arrays.asList( DefaultSnowballStemmer.getDescription("English"), PosTaggerAnnotator.getDescription(), ParserAnnotator.getDescription()), infos); } }
@Override protected TempEval2010Evaluation getEvaluation(File trainDir, File testDir, File outputDir) throws Exception { List<ModelInfo<TemporalLink>> infos = new ArrayList<ModelInfo<TemporalLink>>(); infos.add(new TemporalLinkModelInfo(TemporalLinkEventToSubordinatedEventAnnotator.FACTORY)); return new TempEval2010Evaluation( trainDir, testDir, outputDir, Arrays.asList( TempEval2010GoldAnnotator.PARAM_TEXT_VIEWS, TempEval2010GoldAnnotator.PARAM_TIME_EXTENT_VIEWS, TempEval2010GoldAnnotator.PARAM_TIME_ATTRIBUTE_VIEWS, TempEval2010GoldAnnotator.PARAM_EVENT_EXTENT_VIEWS, TempEval2010GoldAnnotator.PARAM_EVENT_ATTRIBUTE_VIEWS), TempEval2010GoldAnnotator.PARAM_TEMPORAL_LINK_EVENT_TO_SUBORDINATED_EVENT_VIEWS, TempEval2010Writer.PARAM_TEMPORAL_LINK_EVENT_TO_SUBORDINATED_EVENT_VIEW, Arrays.asList( DefaultSnowballStemmer.getDescription("English"), PosTaggerAnnotator.getDescription(), ParserAnnotator.getDescription()), infos); } }
@Override protected TempEval2010Evaluation getEvaluation(File trainDir, File testDir, File outputDir) throws Exception { List<ModelInfo<TemporalLink>> infos = new ArrayList<ModelInfo<TemporalLink>>(); infos.add(new TemporalLinkModelInfo(TemporalLinkEventToSameSentenceTimeAnnotator.FACTORY)); return new TempEval2010Evaluation( trainDir, testDir, outputDir, Arrays.asList( TempEval2010GoldAnnotator.PARAM_TEXT_VIEWS, TempEval2010GoldAnnotator.PARAM_TIME_EXTENT_VIEWS, TempEval2010GoldAnnotator.PARAM_TIME_ATTRIBUTE_VIEWS, TempEval2010GoldAnnotator.PARAM_EVENT_EXTENT_VIEWS, TempEval2010GoldAnnotator.PARAM_EVENT_ATTRIBUTE_VIEWS), TempEval2010GoldAnnotator.PARAM_TEMPORAL_LINK_EVENT_TO_SAME_SENTENCE_TIME_VIEWS, TempEval2010Writer.PARAM_TEMPORAL_LINK_EVENT_TO_SAME_SENTENCE_TIME_VIEW, Arrays.asList( DefaultSnowballStemmer.getDescription("English"), PosTaggerAnnotator.getDescription(), ParserAnnotator.getDescription()), infos); } }
@Override protected TempEval2010Evaluation getEvaluation(File trainDir, File testDir, File outputDir) throws Exception { List<ModelInfo<TemporalLink>> infos = new ArrayList<ModelInfo<TemporalLink>>(); infos.add(new TemporalLinkModelInfo(TemporalLinkEventToDocumentCreationTimeAnnotator.FACTORY)); return new TempEval2010Evaluation( trainDir, testDir, outputDir, Arrays.asList( TempEval2010GoldAnnotator.PARAM_TEXT_VIEWS, TempEval2010GoldAnnotator.PARAM_DOCUMENT_CREATION_TIME_VIEWS, TempEval2010GoldAnnotator.PARAM_TIME_EXTENT_VIEWS, TempEval2010GoldAnnotator.PARAM_TIME_ATTRIBUTE_VIEWS, TempEval2010GoldAnnotator.PARAM_EVENT_EXTENT_VIEWS, TempEval2010GoldAnnotator.PARAM_EVENT_ATTRIBUTE_VIEWS), TempEval2010GoldAnnotator.PARAM_TEMPORAL_LINK_EVENT_TO_DOCUMENT_CREATION_TIME_VIEWS, TempEval2010Writer.PARAM_TEMPORAL_LINK_EVENT_TO_DOCUMENT_CREATION_TIME_VIEW, Arrays.asList( DefaultSnowballStemmer.getDescription("English"), PosTaggerAnnotator.getDescription(), ParserAnnotator.getDescription()), infos); } }
@Override protected TempEval2010Evaluation getEvaluation(File trainDir, File testDir, File outputDir) throws Exception { List<ModelInfo<TemporalLink>> infos = new ArrayList<ModelInfo<TemporalLink>>(); infos.add(new TemporalLinkModelInfo( TemporalLinkMainEventToNextSentenceMainEventAnnotator.FACTORY)); return new TempEval2010Evaluation( trainDir, testDir, outputDir, Arrays.asList( TempEval2010GoldAnnotator.PARAM_TEXT_VIEWS, TempEval2010GoldAnnotator.PARAM_TIME_EXTENT_VIEWS, TempEval2010GoldAnnotator.PARAM_TIME_ATTRIBUTE_VIEWS, TempEval2010GoldAnnotator.PARAM_EVENT_EXTENT_VIEWS, TempEval2010GoldAnnotator.PARAM_EVENT_ATTRIBUTE_VIEWS), TempEval2010GoldAnnotator.PARAM_TEMPORAL_LINK_MAIN_EVENT_TO_NEXT_SENTENCE_MAIN_EVENT_VIEWS, TempEval2010Writer.PARAM_TEMPORAL_LINK_MAIN_EVENT_TO_NEXT_SENTENCE_MAIN_EVENT_VIEW, Arrays.asList( DefaultSnowballStemmer.getDescription("English"), PosTaggerAnnotator.getDescription(), ParserAnnotator.getDescription()), infos); } }
@Override protected TempEval2010Evaluation getEvaluation(File trainDir, File testDir, File outputDir) throws Exception { List<ModelInfo<Event>> infos = new ArrayList<ModelInfo<Event>>(); infos.add(new EventModelInfo("aspect", EventAspectAnnotator.FACTORY)); infos.add(new EventModelInfo("eventClass", EventClassAnnotator.FACTORY)); infos.add(new EventModelInfo("modality", EventModalityAnnotator.FACTORY)); infos.add(new EventModelInfo("polarity", EventPolarityAnnotator.FACTORY)); infos.add(new EventModelInfo("tense", EventTenseAnnotator.FACTORY)); return new TempEval2010Evaluation( trainDir, testDir, outputDir, Arrays.asList( TempEval2010GoldAnnotator.PARAM_TEXT_VIEWS, TempEval2010GoldAnnotator.PARAM_EVENT_EXTENT_VIEWS), TempEval2010GoldAnnotator.PARAM_EVENT_ATTRIBUTE_VIEWS, TempEval2010Writer.PARAM_EVENT_ATTRIBUTE_VIEW, Arrays.asList( DefaultSnowballStemmer.getDescription("English"), PosTaggerAnnotator.getDescription()), infos); } }
public static void main(String[] args) throws Exception { File filesDirectory = new File(args[0]); SimplePipeline.runPipeline( UriCollectionReader.getCollectionReaderFromDirectory(filesDirectory), UriToDocumentTextAnnotator.getDescription(), SentenceAnnotator.getDescription(), TokenAnnotator.getDescription(), PosTaggerAnnotator.getDescription(), ParserAnnotator.getDescription()); } }
SentenceAnnotator.getDescription(), TokenAnnotator.getDescription(), PosTaggerAnnotator.getDescription(), DefaultSnowballStemmer.getDescription("English"), ParserAnnotator.getDescription(),
public static void main(String[] args) throws Exception { Options options = CliFactory.parseArguments(Options.class, args); CollectionReader reader = UriCollectionReader.getCollectionReaderFromDirectory(options.getInputDirectory()); AnalysisEngineDescription uriToText = UriToDocumentTextAnnotator.getDescription(); AnalysisEngineDescription sentences = SentenceAnnotator.getDescription(); AnalysisEngineDescription tokenizer = TokenAnnotator.getDescription(); AnalysisEngineDescription posTagger = PosTaggerAnnotator.getDescription(); AnalysisEngineDescription lineWriter = AnalysisEngineFactory.createEngineDescription( LineWriter.class, LineWriter.PARAM_OUTPUT_FILE_NAME, options.getOutputFile(), LineWriter.PARAM_OUTPUT_ANNOTATION_CLASS_NAME, Token.class.getName(), LineWriter.PARAM_ANNOTATION_WRITER_CLASS_NAME, TokenAnnotationWriter.class.getName()); SimplePipeline.runPipeline(reader, uriToText, sentences, tokenizer, posTagger, lineWriter); System.out.println("results written to " + options.getOutputFile()); }
@Override public void train(CollectionReader collectionReader, File outputDirectory) throws Exception { // assemble the training pipeline AggregateBuilder aggregate = new AggregateBuilder(); // an annotator that loads the text from the training file URIs aggregate.add(UriToDocumentTextAnnotator.getDescription()); // an annotator that parses and loads MASC named entity annotations (and tokens) aggregate.add(MascGoldAnnotator.getDescription()); // an annotator that adds part-of-speech tags aggregate.add(PosTaggerAnnotator.getDescription()); // our NamedEntityChunker annotator, configured to write Mallet CRF training data aggregate.add(AnalysisEngineFactory.createEngineDescription( NamedEntityChunker.class, CleartkSequenceAnnotator.PARAM_IS_TRAINING, true, DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, outputDirectory, DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, MalletCrfStringOutcomeDataWriter.class)); // run the pipeline over the training corpus SimplePipeline.runPipeline(collectionReader, aggregate.createAggregateDescription()); // quiet Mallet down a bit (but still leave likelihoods so you can see progress) Logger malletLogger = Logger.getLogger("cc.mallet"); malletLogger.setLevel(Level.WARNING); Logger likelihoodLogger = Logger.getLogger("cc.mallet.fst.CRFOptimizableByLabelLikelihood"); likelihoodLogger.setLevel(Level.INFO); // train a Mallet CRF model on the training data Train.main(outputDirectory); }
public AggregateBuilder buildTrainingAggregate() throws ResourceInitializationException { AggregateBuilder builder = new AggregateBuilder(); builder.add(UriToDocumentTextAnnotator.getDescription()); // NLP pre-processing components builder.add(SentenceAnnotator.getDescription()); builder.add(TokenAnnotator.getDescription()); builder.add(PosTaggerAnnotator.getDescription()); builder.add(DefaultSnowballStemmer.getDescription("English")); // This will extract the features for summarization builder.add(AnalysisEngineFactory.createEngineDescription( SumBasicAnnotator.class, DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, SumBasicDataWriter.class.getName(), DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, this.modelDirectory.getPath(), SumBasicAnnotator.PARAM_TOKEN_FIELD, this.tokenField.name(), SumBasicAnnotator.PARAM_STOPWORDS_URI, stopwordsFile.toURI())); // Save off xmis for re-reading builder.add(AnalysisEngineFactory.createEngineDescription( XmiWriter.class, XmiWriter.PARAM_OUTPUT_DIRECTORY, xmiDirectory.getPath())); return builder; }
SentenceAnnotator.getDescription(), TokenAnnotator.getDescription(), PosTaggerAnnotator.getDescription(), DefaultSnowballStemmer.getDescription("English"), ParserAnnotator.getDescription(),
public static void main(String[] args) throws Exception { Options options = CliFactory.parseArguments(Options.class, args); // a reader that loads the URIs of the text file CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(Arrays.asList(options.getTextFile())); // assemble the classification pipeline AggregateBuilder aggregate = new AggregateBuilder(); // an annotator that loads the text from the training file URIs aggregate.add(UriToDocumentTextAnnotator.getDescription()); // annotators that identify sentences, tokens and part-of-speech tags in the text aggregate.add(SentenceAnnotator.getDescription()); aggregate.add(TokenAnnotator.getDescription()); aggregate.add(PosTaggerAnnotator.getDescription()); // our NamedEntityChunker annotator, configured to classify on the new texts aggregate.add(AnalysisEngineFactory.createEngineDescription( NamedEntityChunker.class, CleartkSequenceAnnotator.PARAM_IS_TRAINING, false, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, JarClassifierBuilder.getModelJarFile(options.getModelDirectory()))); // a very simple annotator that just prints out any named entities we found aggregate.add(AnalysisEngineFactory.createEngineDescription(PrintNamedEntityMentions.class)); // run the classification pipeline on the new texts SimplePipeline.runPipeline(reader, aggregate.createAggregateDescription()); }
aggregate.add(SentenceAnnotator.getDescription()); aggregate.add(TokenAnnotator.getDescription()); aggregate.add(PosTaggerAnnotator.getDescription()); aggregate.add(AnalysisEngineFactory.createEngineDescription( NamedEntityChunker.class,
public static void main(String[] args) throws Exception { Options options = CliFactory.parseArguments(Options.class, args); // a reader that loads the URIs of the training files CollectionReaderDescription reader = UriCollectionReader.getDescriptionFromDirectory( options.getTrainDirectory(), MascTextFileFilter.class, null); // assemble the training pipeline AggregateBuilder aggregate = new AggregateBuilder(); // an annotator that loads the text from the training file URIs aggregate.add(UriToDocumentTextAnnotator.getDescription()); // an annotator that parses and loads MASC named entity annotations (and tokens) aggregate.add(MascGoldAnnotator.getDescription()); // an annotator that adds part-of-speech tags (so we can use them for features) aggregate.add(PosTaggerAnnotator.getDescription()); // our NamedEntityChunker annotator, configured to write Mallet CRF training data aggregate.add(AnalysisEngineFactory.createEngineDescription( NamedEntityChunker.class, CleartkSequenceAnnotator.PARAM_IS_TRAINING, true, DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, options.getModelDirectory(), DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, MalletCrfStringOutcomeDataWriter.class)); // run the pipeline over the training corpus SimplePipeline.runPipeline(reader, aggregate.createAggregateDescription()); // train a Mallet CRF model on the training data Train.main(options.getModelDirectory()); }
new Class<?>[] { Text.class })); builder.add(TokenAnnotator.getDescription()); builder.add(PosTaggerAnnotator.getDescription()); builder.add(DefaultSnowballStemmer.getDescription("English")); builder.add(ParserAnnotator.getDescription());
new Class<?>[] { Text.class })); preprocess.add(TokenAnnotator.getDescription()); preprocess.add(PosTaggerAnnotator.getDescription()); preprocess.add(DefaultSnowballStemmer.getDescription("English")); preprocess.add(ParserAnnotator.getDescription());