private Span[] testOpenNLPPosition(String text) throws Exception { try (InputStream modelIn = this.getClass().getResourceAsStream(RESOURCES_EN_SENT_BIN)) { SentenceModel model = new SentenceModel(modelIn); SentenceDetectorME sentenceDetector = new SentenceDetectorME(model); return sentenceDetector.sentPosDetect(text); } }
@Override protected Span[] detectSentences(String text) { return sentenceDetector.sentPosDetect(text); }
/** * Detect sentences in a String. * * @param s The string to be processed. * * @return A string array containing individual sentences as elements. */ public String[] sentDetect(String s) { Span[] spans = sentPosDetect(s); String[] sentences; if (spans.length != 0) { sentences = new String[spans.length]; for (int si = 0; si < spans.length; si++) { sentences[si] = spans[si].getCoveredText(s).toString(); } } else { sentences = new String[] {}; } return sentences; }
@Override protected Span[] detectSentences(String text) { return sentenceDetector.sentPosDetect(text); }
public List<Integer> handleParagraph(String paragraph, int paragraphStart, List<Integer> sentenceSplits) { if (paragraphStart > 0) sentenceSplits.add(paragraphStart) ; Span[] spans = sentenceDetector.sentPosDetect(paragraph) ; for (int spanIndex = 0 ; spanIndex < spans.length - 1 ; spanIndex++) { //add splits for all spans except for the last one (that split gets handled at the paragraph level Span span = spans[spanIndex] ; sentenceSplits.add(paragraphStart + span.getEnd()) ; //System.out.println(" - " + (span.getStart() + paragraphStart) + "," + (span.getEnd() + paragraphStart)); } return sentenceSplits ; }
/** * Detect sentences in a String. * * @param s The string to be processed. * * @return A string array containing individual sentences as elements. */ public String[] sentDetect(String s) { Span[] spans = sentPosDetect(s); String[] sentences; if (spans.length != 0) { sentences = new String[spans.length]; for (int si = 0; si < spans.length; si++) { sentences[si] = spans[si].getCoveredText(s).toString(); } } else { sentences = new String[] {}; } return sentences; }
/** * Detect sentences in a String. * * @param s The string to be processed. * * @return A string array containing individual sentences as elements. */ public String[] sentDetect(String s) { Span[] spans = sentPosDetect(s); String[] sentences; if (spans.length != 0) { sentences = new String[spans.length]; for (int si = 0; si < spans.length; si++) { sentences[si] = spans[si].getCoveredText(s).toString(); } } else { sentences = new String[] {}; } return sentences; }
@Override public List<RawSentence> tokenize( final String sentenceSource ) { if ( Strings.isNullOrEmpty(sentenceSource) ) { return Collections.emptyList(); } final Span[] sentencesStrings = new SentenceDetectorME(sentenceModel).sentPosDetect(sentenceSource); return Arrays.stream(sentencesStrings) .map(span -> new RawSentence(span.getCoveredText(sentenceSource).toString(), span.getStart(), span.getEnd())) .collect(Collectors.toList()); } }
/** * Method tokenize input to list of sentence. It also append * * @param sentenceSource * * @return List of found sentences * * @throws IOException */ public List<RawSentence> tokenize(final Reader sentenceSource ) throws IOException { final StringBuilder textBuilder = new StringBuilder(textBufferSize); final char[] buffer = new char[readBufferSize]; int read; do { read = sentenceSource.read(buffer); if ( read == -1 ) { break; } textBuilder.append(buffer, 0, read); } while ( true ); final String text = textBuilder.toString(); final Span[] sentencesStrings = new SentenceDetectorME(sentenceModel).sentPosDetect(text); return Arrays.stream(sentencesStrings) .map(span -> new RawSentence(span.getCoveredText(text).toString(), span.getStart(), span.getEnd())) .collect(Collectors.toList()); }
public List<Sentence> createSentencesFromTokens(List<Token> tokens) { List<Sentence> result = new ArrayList<>(); String text = WordHelpers.tokensToText(tokens, 0); String lang = detectLanguage(text); // find best Tokenizer and Splitter for text SentenceDetectorME ssplit = sentenceSplitter.getOrDefault(lang, sentenceSplitter.get(LANG_EN)); opennlp.tools.util.Span sentences[] = ssplit.sentPosDetect(text); // Tokenize sentences Iterator<Token> tokenIt = tokens.iterator(); if(!tokenIt.hasNext()) return result; Token currentToken = tokenIt.next(); for(opennlp.tools.util.Span sentence : sentences) { if(sentence == null) continue; List<Token> tokenList = new ArrayList<>(); while(currentToken.getBegin() < sentence.getEnd()) { if(!currentToken.getText().equals("\n")) { tokenList.add(currentToken); } if(!tokenIt.hasNext()) break; currentToken = tokenIt.next(); } result.add(new Sentence(tokenList)); } return result; }
/** * @throws IllegalArgumentException * if document text is null. */ public void analyze(Document document) { if (document.getText() == null) throw new IllegalArgumentException("Document text is null."); Span[] spans; synchronized (sentenceDetector) { spans = sentenceDetector.sentPosDetect(document.getText()); } List<Sentence> sentences = new ArrayList<Sentence>(spans.length); for (int i = 0; i < spans.length; i++) { Sentence sentence = new SentenceImpl(spans[i].getStart(), spans[i].getEnd(), document); sentences.add(sentence); } document.setSentences(sentences); } }
/** * @throws IllegalArgumentException * if document text is null. */ public void analyze(Document document) { if (document.getText() == null) throw new IllegalArgumentException("Document text is null."); Span[] spans; synchronized (sentenceDetector) { spans = sentenceDetector.sentPosDetect(document.getText()); } List<Sentence> sentences = new ArrayList<Sentence>(spans.length); for (int i = 0; i < spans.length; i++) { Sentence sentence = new SentenceImpl(spans[i].getStart(), spans[i].getEnd(), document); sentences.add(sentence); } document.setSentences(sentences); } }
/** * Sentence Splitter. Return a List of {@link ISentence} * * @param text * @return * @throws IOException */ public List<ISentence> getSentencesText(String text) throws IOException { SentenceDetectorME sentenceDetector = new SentenceDetectorME(getSentenceModel()); Span sentences[] = sentenceDetector.sentPosDetect(text); List<ISentence> sents = new ArrayList<ISentence>(); for(Span sent:sentences) { SentenceImpl sen = new SentenceImpl(sent.getStart(), sent.getEnd(), text.substring((int) sent.getStart(), (int) sent.getEnd())); sents.add(sen); } return sents; }
for (opennlp.tools.util.Span onlpSpan : sentenceDetector.sentPosDetect(segmentText)) { Span span = new Span(onlpSpan.getStart(), onlpSpan.getEnd()).offsetRightByStartIndex(segment); if (Patterns.NON_WHITESPACE.matcher(span.coveredString(text)).find()) {
Span sentenceSpans[] = sentenceDetector.sentPosDetect(plainText);
Span sentenceSpans[] = sentenceDetector.sentPosDetect(plainText);
Span[] sentences = sentenceDetector.sentPosDetect(content); for (int index = 0, i = 0; i < sentences.length; i++) { int start = sentences[i].getStart();
SentenceDetectorME ssplit = sentenceSplitter.getOrDefault(lang, sentenceSplitter.get(LANG_EN)); opennlp.tools.util.Span sentences[] = ssplit.sentPosDetect(text);
@Override protected void process(JCas aJCas, String aText, int aZoneBegin) throws AnalysisEngineProcessException { if (isWriteSentence()) { Span[] sentences = sentenceModelProvider.getResource().sentPosDetect(aText); for (Span sSpan : sentences) { createSentence(aJCas, sSpan.getStart() + aZoneBegin, sSpan.getEnd() + aZoneBegin); } } if (isWriteToken()) { for (Sentence sent : selectCovered(aJCas, Sentence.class, aZoneBegin, aZoneBegin + aText.length())) { Span[] tokens = tokenModelProvider.getResource().tokenizePos(sent.getCoveredText()); for (Span tSpan : tokens) { createToken(aJCas, tSpan.getStart() + sent.getBegin(), tSpan.getEnd() + sent.getBegin()); } } } } }
Span[] pos = sentDetect.sentPosDetect(sampleSentences2); Assert.assertEquals(pos.length,2); probs = sentDetect.getSentenceProbabilities();