public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.context = context; Logger mLogger = context.getLogger(); if (mLogger.isLoggable(Level.INFO)) { mLogger.log(Level.INFO, "Initializing the OpenNLP Categorizer."); } DoccatModel model; try { DoccatModelResource modelResource = (DoccatModelResource) context .getResourceObject(UimaUtil.MODEL_PARAMETER); model = modelResource.getModel(); } catch (ResourceAccessException e) { throw new ResourceInitializationException(e); } mCategorizer = new DocumentCategorizerME(model); }
new DocumentCategorizerME(model), listeners.toArray(new DoccatEvaluationMonitor[listeners.size()]));
/** * Starts the evaluation. * * @param samples * the data to train and test * @param nFolds * number of folds * * @throws IOException */ public void evaluate(ObjectStream<DocumentSample> samples, int nFolds) throws IOException { CrossValidationPartitioner<DocumentSample> partitioner = new CrossValidationPartitioner<>( samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<DocumentSample> trainingSampleStream = partitioner .next(); DoccatModel model = DocumentCategorizerME.train(languageCode, trainingSampleStream, params, factory); DocumentCategorizerEvaluator evaluator = new DocumentCategorizerEvaluator( new DocumentCategorizerME(model), listeners); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); documentAccuracy.add(evaluator.getAccuracy(), evaluator.getDocumentCount()); } }
DocumentCategorizerME documentCategorizerME = new DocumentCategorizerME(model);
@Test public void testSimpleTraining() throws IOException { ObjectStream<DocumentSample> samples = ObjectStreamUtils.createObjectStream( new DocumentSample("1", new String[]{"a", "b", "c"}), new DocumentSample("1", new String[]{"a", "b", "c", "1", "2"}), new DocumentSample("1", new String[]{"a", "b", "c", "3", "4"}), new DocumentSample("0", new String[]{"x", "y", "z"}), new DocumentSample("0", new String[]{"x", "y", "z", "5", "6"}), new DocumentSample("0", new String[]{"x", "y", "z", "7", "8"})); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 0); DoccatModel model = DocumentCategorizerME.train("x-unspecified", samples, params, new DoccatFactory()); DocumentCategorizer doccat = new DocumentCategorizerME(model); double[] aProbs = doccat.categorize(new String[]{"a"}); Assert.assertEquals("1", doccat.getBestCategory(aProbs)); double[] bProbs = doccat.categorize(new String[]{"x"}); Assert.assertEquals("0", doccat.getBestCategory(bProbs)); //test to make sure sorted map's last key is cat 1 because it has the highest score. SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap(new String[]{"a"}); Set<String> cat = sortedScoreMap.get(sortedScoreMap.lastKey()); Assert.assertEquals(1, cat.size()); }
params, new DoccatFactory()); DocumentCategorizer doccat = new DocumentCategorizerME(model);
DocumentCategorizerME doccat = new DocumentCategorizerME(model);
DoccatModel model = new DoccatModel(modelStream); = new DocumentCategorizerME(model, nffg, bowfg); Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.context = context; Logger mLogger = context.getLogger(); if (mLogger.isLoggable(Level.INFO)) { mLogger.log(Level.INFO, "Initializing the OpenNLP Categorizer."); } DoccatModel model; try { DoccatModelResource modelResource = (DoccatModelResource) context .getResourceObject(UimaUtil.MODEL_PARAMETER); model = modelResource.getModel(); } catch (ResourceAccessException e) { throw new ResourceInitializationException(e); } mCategorizer = new DocumentCategorizerME(model); }
new DocumentCategorizerME(model), listeners.toArray(new DoccatEvaluationMonitor[listeners.size()]));
new DocumentCategorizerME(model), listeners.toArray(new DoccatEvaluationMonitor[listeners.size()]));
DocumentCategorizerME categorizer = new DocumentCategorizerME(doccatModel); NameFinderME[] nameFinderMEs = new NameFinderME[tokenNameFinderModels.size()]; for (int i = 0; i < tokenNameFinderModels.size(); i++) {
/** * Starts the evaluation. * * @param samples * the data to train and test * @param nFolds * number of folds * * @throws IOException */ public void evaluate(ObjectStream<DocumentSample> samples, int nFolds) throws IOException { CrossValidationPartitioner<DocumentSample> partitioner = new CrossValidationPartitioner<>( samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<DocumentSample> trainingSampleStream = partitioner .next(); DoccatModel model = DocumentCategorizerME.train(languageCode, trainingSampleStream, params, factory); DocumentCategorizerEvaluator evaluator = new DocumentCategorizerEvaluator( new DocumentCategorizerME(model), listeners); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); documentAccuracy.add(evaluator.getAccuracy(), evaluator.getDocumentCount()); } }
/** * Starts the evaluation. * * @param samples * the data to train and test * @param nFolds * number of folds * * @throws IOException */ public void evaluate(ObjectStream<DocumentSample> samples, int nFolds) throws IOException { CrossValidationPartitioner<DocumentSample> partitioner = new CrossValidationPartitioner<>( samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<DocumentSample> trainingSampleStream = partitioner .next(); DoccatModel model = DocumentCategorizerME.train(languageCode, trainingSampleStream, params, factory); DocumentCategorizerEvaluator evaluator = new DocumentCategorizerEvaluator( new DocumentCategorizerME(model), listeners); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); documentAccuracy.add(evaluator.getAccuracy(), evaluator.getDocumentCount()); } }
@Override public void predict(RecommenderContext aContext, CAS aCas) throws RecommendationException { DoccatModel model = aContext.get(KEY_MODEL).orElseThrow(() -> new RecommendationException("Key [" + KEY_MODEL + "] not found in context")); DocumentCategorizerME finder = new DocumentCategorizerME(model); Type sentenceType = getType(aCas, Sentence.class); Type predictionType = getAnnotationType(aCas, PredictedSpan.class); Type tokenType = getType(aCas, Token.class); Feature confidenceFeature = predictionType.getFeatureByBaseName("score"); Feature labelFeature = predictionType.getFeatureByBaseName("label"); for (AnnotationFS sentence : select(aCas, sentenceType)) { List<AnnotationFS> tokenAnnotations = selectCovered(tokenType, sentence); String[] tokens = tokenAnnotations.stream() .map(AnnotationFS::getCoveredText) .toArray(String[]::new); double[] outcome = finder.categorize(tokens); String label = finder.getBestCategory(outcome); AnnotationFS annotation = aCas.createAnnotation(predictionType, sentence.getBegin(), sentence.getEnd()); annotation.setDoubleValue(confidenceFeature, NumberUtils.max(outcome)); annotation.setStringValue(labelFeature, label); aCas.addFsToIndexes(annotation); } }
DocumentCategorizerME documentCategorizerME = new DocumentCategorizerME(model);
DocumentCategorizerME documentCategorizerME = new DocumentCategorizerME(model);