public void process(CAS cas) { FSIterator<AnnotationFS> tokenAnnotations = cas.getAnnotationIndex(mTokenType).iterator(); List<String> tokensList = new ArrayList<>(); while (tokenAnnotations.hasNext()) { tokensList.add(tokenAnnotations.next().getCoveredText()); } double[] result = mCategorizer.categorize(tokensList.toArray(new String[tokensList.size()])); String bestCategory = mCategorizer.getBestCategory(result); setBestCategory(cas, bestCategory); } }
@Test public void testSimpleTraining() throws IOException { ObjectStream<DocumentSample> samples = ObjectStreamUtils.createObjectStream( new DocumentSample("1", new String[]{"a", "b", "c"}), new DocumentSample("1", new String[]{"a", "b", "c", "1", "2"}), new DocumentSample("1", new String[]{"a", "b", "c", "3", "4"}), new DocumentSample("0", new String[]{"x", "y", "z"}), new DocumentSample("0", new String[]{"x", "y", "z", "5", "6"}), new DocumentSample("0", new String[]{"x", "y", "z", "7", "8"})); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 0); DoccatModel model = DocumentCategorizerME.train("x-unspecified", samples, params, new DoccatFactory()); DocumentCategorizer doccat = new DocumentCategorizerME(model); double[] aProbs = doccat.categorize(new String[]{"a"}); Assert.assertEquals("1", doccat.getBestCategory(aProbs)); double[] bProbs = doccat.categorize(new String[]{"x"}); Assert.assertEquals("0", doccat.getBestCategory(bProbs)); //test to make sure sorted map's last key is cat 1 because it has the highest score. SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap(new String[]{"a"}); Set<String> cat = sortedScoreMap.get(sortedScoreMap.lastKey()); Assert.assertEquals(1, cat.size()); }
String[] tokens = tokenizer.tokenize(docText); double[] probs = categorizer.categorize(tokens); //<co id="tmt.categorize"/> String label = categorizer.getBestCategory(probs); int bestIndex = categorizer.getIndex(label); double score = probs[bestIndex];
Tokenizer tokenizer = SimpleTokenizer.INSTANCE; int catCount = categorizer.getNumberOfCategories(); Collection<String> categories = new ArrayList<String>(catCount); for (int i=0; i < catCount; i++) { categories.add(categorizer.getCategory(i));
/** * Evaluates the given reference {@link DocumentSample} object. * * This is done by categorizing the document from the provided * {@link DocumentSample}. The detected category is then used * to calculate and update the score. * * @param sample the reference {@link TokenSample}. */ public DocumentSample processSample(DocumentSample sample) { String[] document = sample.getText(); double[] probs = categorizer.categorize(document); String cat = categorizer.getBestCategory(probs); if (sample.getCategory().equals(cat)) { accuracy.add(1); } else { accuracy.add(0); } return new DocumentSample(cat, sample.getText()); }
double[] aProbs = doccat.categorize(new String[]{"a"}); Assert.assertEquals("1", doccat.getBestCategory(aProbs)); double[] bProbs = doccat.categorize(new String[]{"x"}); Assert.assertEquals("0", doccat.getBestCategory(bProbs)); SortedMap<Double, Set<String>> sortedScoreMap = doccat.sortedScoreMap(new String[]{"a"}); Set<String> cat = sortedScoreMap.get(sortedScoreMap.lastKey()); Assert.assertEquals(1, cat.size());
public void process(CAS cas) { FSIterator<AnnotationFS> tokenAnnotations = cas.getAnnotationIndex(mTokenType).iterator(); List<String> tokensList = new ArrayList<>(); while (tokenAnnotations.hasNext()) { tokensList.add(tokenAnnotations.next().getCoveredText()); } double[] result = mCategorizer.categorize(tokensList.toArray(new String[tokensList.size()])); String bestCategory = mCategorizer.getBestCategory(result); setBestCategory(cas, bestCategory); } }
/** * Evaluates the given reference {@link DocumentSample} object. * * This is done by categorizing the document from the provided * {@link DocumentSample}. The detected category is then used * to calculate and update the score. * * @param sample the reference {@link TokenSample}. */ public DocumentSample processSample(DocumentSample sample) { String[] document = sample.getText(); double[] probs = categorizer.categorize(document); String cat = categorizer.getBestCategory(probs); if (sample.getCategory().equals(cat)) { accuracy.add(1); } else { accuracy.add(0); } return new DocumentSample(cat, sample.getText()); }
/** * Evaluates the given reference {@link DocumentSample} object. * * This is done by categorizing the document from the provided * {@link DocumentSample}. The detected category is then used * to calculate and update the score. * * @param sample the reference {@link TokenSample}. */ public DocumentSample processSample(DocumentSample sample) { String[] document = sample.getText(); double[] probs = categorizer.categorize(document); String cat = categorizer.getBestCategory(probs); if (sample.getCategory().equals(cat)) { accuracy.add(1); } else { accuracy.add(0); } return new DocumentSample(cat, sample.getText()); }