/** * Creates a new {@link BratNameSampleStream}. * @param sentModel a {@link SentenceModel} model * @param tokenModel a {@link TokenizerModel} model * @param samples a {@link BratDocument} {@link ObjectStream} */ public BratNameSampleStream(SentenceModel sentModel, TokenizerModel tokenModel, ObjectStream<BratDocument> samples) { super(samples); // TODO: We can pass in custom validators here ... this.parser = new BratDocumentParser(new SentenceDetectorME(sentModel), new TokenizerME(tokenModel), null); }
private Span[] testOpenNLPPosition(String text) throws Exception { try (InputStream modelIn = this.getClass().getResourceAsStream(RESOURCES_EN_SENT_BIN)) { SentenceModel model = new SentenceModel(modelIn); SentenceDetectorME sentenceDetector = new SentenceDetectorME(model); return sentenceDetector.sentPosDetect(text); } }
private String[] testOpenNLP(String text) throws Exception { try (InputStream modelIn = this.getClass().getResourceAsStream(RESOURCES_EN_SENT_BIN)) { SentenceModel model = new SentenceModel(modelIn); SentenceDetectorME sentenceDetector = new SentenceDetectorME(model); return sentenceDetector.sentDetect(text); } }
SentenceModel sentdetectModel = SentenceDetectorME.train( "eng", new SentenceSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8)), factory, mlParams); SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel); String[] sents = sentDetect.sentDetect(sampleSentences1); Assert.assertEquals(sents.length,2); Assert.assertEquals(sents[0],"This is a test."); Assert.assertEquals(sents[1],"There are many tests, this is the second."); double[] probs = sentDetect.getSentenceProbabilities(); Assert.assertEquals(probs.length,2); sents = sentDetect.sentDetect(sampleSentences2); Assert.assertEquals(sents.length,2); probs = sentDetect.getSentenceProbabilities(); Assert.assertEquals(probs.length,2); Assert.assertEquals(sents[0],"This is a test."); sents = sentDetect.sentDetect(sampleSentences3); Assert.assertEquals(sents.length,2); probs = sentDetect.getSentenceProbabilities(); Assert.assertEquals(probs.length,2); Assert.assertEquals(sents[0],"This is a \"test\"."); sents = sentDetect.sentDetect(sampleSentences4); Assert.assertEquals(sents.length,3); probs = sentDetect.getSentenceProbabilities(); Assert.assertEquals(probs.length,3);
/** * Starts the evaluation. * * @param samples * the data to train and test * @param nFolds * number of folds * * @throws IOException */ public void evaluate(ObjectStream<SentenceSample> samples, int nFolds) throws IOException { CrossValidationPartitioner<SentenceSample> partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<SentenceSample> trainingSampleStream = partitioner.next(); SentenceModel model; model = SentenceDetectorME.train(languageCode, trainingSampleStream, sdFactory, params); // do testing SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator( new SentenceDetectorME(model), listeners); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); fmeasure.mergeInto(evaluator.getFMeasure()); } }
@Override protected Span[] detectSentences(String text) { return sentenceDetector.sentPosDetect(text); }
/** * @deprecated Use * {@link #train(String, ObjectStream, SentenceDetectorFactory, TrainingParameters)} * and pass in af {@link SentenceDetectorFactory}. */ public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, TrainingParameters mlParams) throws IOException { SentenceDetectorFactory sdFactory = new SentenceDetectorFactory( languageCode, useTokenEnd, abbreviations, null); return train(languageCode, samples, sdFactory, mlParams); }
// Initialize the sentence detector final SentenceDetectorME sdetector = EasyParserUtils .getOpenNLPSentDetector(Constants.SENTENCE_DETECTOR_DATA); // Initialize the parser final Parser parser = EasyParserUtils .getOpenNLPParser(Constants.PARSER_DATA_LOC); // Get sentences of the text final String sentences[] = sdetector.sentDetect(essay); // Go through the sentences and parse each for (final String sentence : sentences) { // Parse the sentence, produce only 1 parse final Parse[] parses = ParserTool.parseLine(sentence, parser, 10); if (parses.length == 0) { // Most probably this is code } else { // An English sentence } }
/** * Starts the evaluation. * * @param samples * the data to train and test * @param nFolds * number of folds * * @throws IOException */ public void evaluate(ObjectStream<SentenceSample> samples, int nFolds) throws IOException { CrossValidationPartitioner<SentenceSample> partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<SentenceSample> trainingSampleStream = partitioner.next(); SentenceModel model; model = SentenceDetectorME.train(languageCode, trainingSampleStream, sdFactory, params); // do testing SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator( new SentenceDetectorME(model), listeners); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); fmeasure.mergeInto(evaluator.getFMeasure()); } }
/** * Detect sentences in a String. * * @param s The string to be processed. * * @return A string array containing individual sentences as elements. */ public String[] sentDetect(String s) { Span[] spans = sentPosDetect(s); String[] sentences; if (spans.length != 0) { sentences = new String[spans.length]; for (int si = 0; si < spans.length; si++) { sentences[si] = spans[si].getCoveredText(s).toString(); } } else { sentences = new String[] {}; } return sentences; }
/** * @deprecated Use * {@link #train(String, ObjectStream, SentenceDetectorFactory, TrainingParameters)} * and pass in af {@link SentenceDetectorFactory}. */ @Deprecated public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations) throws IOException { return train(languageCode, samples, useTokenEnd, abbreviations, ModelUtil.createDefaultTrainingParameters()); } }
@Override public List<String> extractSentences(String text) { String[] sentences = sentenceDetector.sentDetect(text); return newArrayList(sentences); }
/** * Creates a new {@link BratNameSampleStream}. * @param sentModel a {@link SentenceModel} model * @param tokenModel a {@link TokenizerModel} model * @param samples a {@link BratDocument} {@link ObjectStream} * @param nameTypes the name types to use or null if all name types */ public BratNameSampleStream(SentenceModel sentModel, TokenizerModel tokenModel, ObjectStream<BratDocument> samples, Set<String> nameTypes) { super(samples); // TODO: We can pass in custom validators here ... this.parser = new BratDocumentParser(new SentenceDetectorME(sentModel), new TokenizerME(tokenModel), nameTypes); }
@Override public List<RawSentence> tokenize( final String sentenceSource ) { if ( Strings.isNullOrEmpty(sentenceSource) ) { return Collections.emptyList(); } final Span[] sentencesStrings = new SentenceDetectorME(sentenceModel).sentPosDetect(sentenceSource); return Arrays.stream(sentencesStrings) .map(span -> new RawSentence(span.getCoveredText(sentenceSource).toString(), span.getStart(), span.getEnd())) .collect(Collectors.toList()); } }
SentenceDetectorME sdetector = new SentenceDetectorME(model); while ((para = paraStream.read()) != null) { String[] sents = sdetector.sentDetect(para); for (String sentence : sents) { System.out.println(sentence);
/** * Starts the evaluation. * * @param samples * the data to train and test * @param nFolds * number of folds * * @throws IOException */ public void evaluate(ObjectStream<SentenceSample> samples, int nFolds) throws IOException { CrossValidationPartitioner<SentenceSample> partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<SentenceSample> trainingSampleStream = partitioner.next(); SentenceModel model; model = SentenceDetectorME.train(languageCode, trainingSampleStream, sdFactory, params); // do testing SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator( new SentenceDetectorME(model), listeners); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); fmeasure.mergeInto(evaluator.getFMeasure()); } }
@Override protected Span[] detectSentences(String text) { return sentenceDetector.sentPosDetect(text); }
private static SentenceModel train(SentenceDetectorFactory factory) throws IOException { return SentenceDetectorME.train("eng", createSampleStream(), factory, TrainingParameters.defaultParams()); }
final String sentences[] = sdetector.sentDetect(essay);
/** * Initializes the current instance with the given context. * <p> * Note: Do all initialization in this method, do not use the constructor. */ public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); SentenceModel model; try { SentenceModelResource modelResource = (SentenceModelResource) context .getResourceObject(UimaUtil.MODEL_PARAMETER); model = modelResource.getModel(); } catch (ResourceAccessException e) { throw new ResourceInitializationException(e); } sentenceDetector = new SentenceDetectorME(model); }