opennlp.tools.sentdetect.SentenceDetectorME java code examples

/**
 * Creates a new {@link BratNameSampleStream}.
 * @param sentModel a {@link SentenceModel} model
 * @param tokenModel a {@link TokenizerModel} model
 * @param samples a {@link BratDocument} {@link ObjectStream}
 */
public BratNameSampleStream(SentenceModel sentModel, TokenizerModel tokenModel,
              ObjectStream<BratDocument> samples) {
 super(samples);
 // TODO: We can pass in custom validators here ...
 this.parser = new BratDocumentParser(new SentenceDetectorME(sentModel),
   new TokenizerME(tokenModel), null);
}

private Span[] testOpenNLPPosition(String text) throws Exception {
  try (InputStream modelIn = this.getClass().getResourceAsStream(RESOURCES_EN_SENT_BIN)) {
    SentenceModel model = new SentenceModel(modelIn);
    SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
    return sentenceDetector.sentPosDetect(text);
  }
}

private String[] testOpenNLP(String text) throws Exception {
  try (InputStream modelIn = this.getClass().getResourceAsStream(RESOURCES_EN_SENT_BIN)) {
    SentenceModel model = new SentenceModel(modelIn);
    SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
    return sentenceDetector.sentDetect(text);
  }
}

SentenceModel sentdetectModel = SentenceDetectorME.train(
  "eng", new SentenceSampleStream(new PlainTextByLineStream(in,
    StandardCharsets.UTF_8)), factory, mlParams);
SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel);
String[] sents = sentDetect.sentDetect(sampleSentences1);
Assert.assertEquals(sents.length,2);
Assert.assertEquals(sents[0],"This is a test.");
Assert.assertEquals(sents[1],"There are many tests, this is the second.");
double[] probs = sentDetect.getSentenceProbabilities();
Assert.assertEquals(probs.length,2);
sents = sentDetect.sentDetect(sampleSentences2);
Assert.assertEquals(sents.length,2);
probs = sentDetect.getSentenceProbabilities();
Assert.assertEquals(probs.length,2);
Assert.assertEquals(sents[0],"This is a test.");
sents = sentDetect.sentDetect(sampleSentences3);
Assert.assertEquals(sents.length,2);
probs = sentDetect.getSentenceProbabilities();
Assert.assertEquals(probs.length,2);
Assert.assertEquals(sents[0],"This is a \"test\".");
sents = sentDetect.sentDetect(sampleSentences4);
Assert.assertEquals(sents.length,3);
probs = sentDetect.getSentenceProbabilities();
Assert.assertEquals(probs.length,3);

/**
 * Starts the evaluation.
 *
 * @param samples
 *          the data to train and test
 * @param nFolds
 *          number of folds
 *
 * @throws IOException
 */
public void evaluate(ObjectStream<SentenceSample> samples, int nFolds) throws IOException {
 CrossValidationPartitioner<SentenceSample> partitioner =
   new CrossValidationPartitioner<>(samples, nFolds);
 while (partitioner.hasNext()) {
  CrossValidationPartitioner.TrainingSampleStream<SentenceSample> trainingSampleStream =
    partitioner.next();
  SentenceModel model;
  model = SentenceDetectorME.train(languageCode, trainingSampleStream,
    sdFactory, params);
  // do testing
  SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator(
    new SentenceDetectorME(model), listeners);
  evaluator.evaluate(trainingSampleStream.getTestSampleStream());
  fmeasure.mergeInto(evaluator.getFMeasure());
 }
}

@Override
protected Span[] detectSentences(String text) {
 return sentenceDetector.sentPosDetect(text);
}

/**
 * @deprecated Use
 *             {@link #train(String, ObjectStream, SentenceDetectorFactory, TrainingParameters)}
 *             and pass in af {@link SentenceDetectorFactory}.
 */
public static SentenceModel train(String languageCode,
  ObjectStream<SentenceSample> samples, boolean useTokenEnd,
  Dictionary abbreviations, TrainingParameters mlParams) throws IOException {
 SentenceDetectorFactory sdFactory = new SentenceDetectorFactory(
   languageCode, useTokenEnd, abbreviations, null);
 return train(languageCode, samples, sdFactory, mlParams);
}

// Initialize the sentence detector
 final SentenceDetectorME sdetector = EasyParserUtils
     .getOpenNLPSentDetector(Constants.SENTENCE_DETECTOR_DATA);
 // Initialize the parser
 final Parser parser = EasyParserUtils
     .getOpenNLPParser(Constants.PARSER_DATA_LOC);
 // Get sentences of the text
 final String sentences[] = sdetector.sentDetect(essay);
 // Go through the sentences and parse each
 for (final String sentence : sentences) {
   // Parse the sentence, produce only 1 parse
   final Parse[] parses = ParserTool.parseLine(sentence, parser, 10);
   if (parses.length == 0) {
     // Most probably this is code
   }
   else {
     // An English sentence
   }
 }

/**
 * Starts the evaluation.
 *
 * @param samples
 *          the data to train and test
 * @param nFolds
 *          number of folds
 *
 * @throws IOException
 */
public void evaluate(ObjectStream<SentenceSample> samples, int nFolds) throws IOException {
 CrossValidationPartitioner<SentenceSample> partitioner =
   new CrossValidationPartitioner<>(samples, nFolds);
 while (partitioner.hasNext()) {
  CrossValidationPartitioner.TrainingSampleStream<SentenceSample> trainingSampleStream =
    partitioner.next();
  SentenceModel model;
  model = SentenceDetectorME.train(languageCode, trainingSampleStream,
    sdFactory, params);
  // do testing
  SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator(
    new SentenceDetectorME(model), listeners);
  evaluator.evaluate(trainingSampleStream.getTestSampleStream());
  fmeasure.mergeInto(evaluator.getFMeasure());
 }
}

/**
 * Detect sentences in a String.
 *
 * @param s  The string to be processed.
 *
 * @return   A string array containing individual sentences as elements.
 */
public String[] sentDetect(String s) {
 Span[] spans = sentPosDetect(s);
 String[] sentences;
 if (spans.length != 0) {
  sentences = new String[spans.length];
  for (int si = 0; si < spans.length; si++) {
   sentences[si] = spans[si].getCoveredText(s).toString();
  }
 }
 else {
  sentences = new String[] {};
 }
 return sentences;
}

 /**
  * @deprecated Use
  *             {@link #train(String, ObjectStream, SentenceDetectorFactory, TrainingParameters)}
  *             and pass in af {@link SentenceDetectorFactory}.
  */
 @Deprecated
 public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples,
   boolean useTokenEnd, Dictionary abbreviations) throws IOException {
  return train(languageCode, samples, useTokenEnd, abbreviations,
    ModelUtil.createDefaultTrainingParameters());
 }
}

@Override
public List<String> extractSentences(String text) {
 String[] sentences = sentenceDetector.sentDetect(text);
 return newArrayList(sentences);
}

/**
 * Creates a new {@link BratNameSampleStream}.
 * @param sentModel a {@link SentenceModel} model
 * @param tokenModel a {@link TokenizerModel} model
 * @param samples a {@link BratDocument} {@link ObjectStream}
 * @param nameTypes the name types to use or null if all name types
 */
public BratNameSampleStream(SentenceModel sentModel, TokenizerModel tokenModel,
  ObjectStream<BratDocument> samples, Set<String> nameTypes) {
 super(samples);
 // TODO: We can pass in custom validators here ...
 this.parser = new BratDocumentParser(new SentenceDetectorME(sentModel),
   new TokenizerME(tokenModel), nameTypes);
}

  @Override
  public List<RawSentence> tokenize( final String sentenceSource ) {
    if ( Strings.isNullOrEmpty(sentenceSource) ) {
      return Collections.emptyList();
    }
    final Span[] sentencesStrings = new SentenceDetectorME(sentenceModel).sentPosDetect(sentenceSource);
    return Arrays.stream(sentencesStrings)
           .map(span -> new RawSentence(span.getCoveredText(sentenceSource).toString(),
                         span.getStart(), span.getEnd()))
           .collect(Collectors.toList());
  }
}

SentenceDetectorME sdetector = new SentenceDetectorME(model);
 while ((para = paraStream.read()) != null) {
  String[] sents = sdetector.sentDetect(para);
  for (String sentence : sents) {
   System.out.println(sentence);

/**
 * Starts the evaluation.
 *
 * @param samples
 *          the data to train and test
 * @param nFolds
 *          number of folds
 *
 * @throws IOException
 */
public void evaluate(ObjectStream<SentenceSample> samples, int nFolds) throws IOException {
 CrossValidationPartitioner<SentenceSample> partitioner =
   new CrossValidationPartitioner<>(samples, nFolds);
 while (partitioner.hasNext()) {
  CrossValidationPartitioner.TrainingSampleStream<SentenceSample> trainingSampleStream =
    partitioner.next();
  SentenceModel model;
  model = SentenceDetectorME.train(languageCode, trainingSampleStream,
    sdFactory, params);
  // do testing
  SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator(
    new SentenceDetectorME(model), listeners);
  evaluator.evaluate(trainingSampleStream.getTestSampleStream());
  fmeasure.mergeInto(evaluator.getFMeasure());
 }
}

@Override
protected Span[] detectSentences(String text) {
 return sentenceDetector.sentPosDetect(text);
}

private static SentenceModel train(SentenceDetectorFactory factory)
  throws IOException {
 return SentenceDetectorME.train("eng", createSampleStream(), factory,
   TrainingParameters.defaultParams());
}

final String sentences[] = sdetector.sentDetect(essay);

/**
 * Initializes the current instance with the given context.
 * <p>
 * Note: Do all initialization in this method, do not use the constructor.
 */
public void initialize(UimaContext context)
  throws ResourceInitializationException {
 super.initialize(context);
 SentenceModel model;
 try {
  SentenceModelResource modelResource = (SentenceModelResource) context
    .getResourceObject(UimaUtil.MODEL_PARAMETER);
  model = modelResource.getModel();
 } catch (ResourceAccessException e) {
  throw new ResourceInitializationException(e);
 }
 sentenceDetector = new SentenceDetectorME(model);
}

Javadoc

A sentence detector for splitting up raw text into sentences.

A maximum entropy model is used to evaluate end-of-sentence characters in a string to determine if they signify the end of a sentence.

Most used methods

<init>
sentPosDetect
Detect the position of the first words of sentences in a String.
sentDetect
Detect sentences in a String.
train
getAbbreviations
getFirstNonWS
getFirstWS
getSentenceProbabilities
Returns the probabilities associated with the most recent calls to sentDetect().
isAcceptableBreak
Allows subclasses to check an overzealous (read: poorly trained) model from flagging obvious non-bre

Popular in Java

Reactive rest calls using spring rest template
notifyDataSetChanged (ArrayAdapter)
putExtra (Intent)
getSharedPreferences (Context)
BufferedInputStream (java.io)
A BufferedInputStream adds functionality to another input stream-namely, the ability to buffer the i
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
GregorianCalendar (java.util)
GregorianCalendar is a concrete subclass of Calendarand provides the standard calendar used by most
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
JTextField (javax.swing)
Table (org.hibernate.mapping)
A relational table
Top PhpStorm plugins

How to useSentenceDetectorME in opennlp.tools.sentdetect

Best Java code snippets using opennlp.tools.sentdetect.SentenceDetectorME (Showing top 20 results out of 315)

How to use
SentenceDetectorME
in
opennlp.tools.sentdetect