/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public Sentence(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
/** * the process method is in charge of doing the tokenization */ @Override public void process(final JCas aJCas) throws AnalysisEngineProcessException { LOGGER.debug("process() - starting processing document"); tokenNumber = 1; // if useCompleteDocText is true, tokenize complete documentText if (useCompleteDocText) { LOGGER.debug("process() - tokenizing whole document text!"); final String text = aJCas.getDocumentText(); writeTokensToCAS(text, 0, aJCas); } // if useCompleteDocText is false, tokenize sentence per sentence else { final JFSIndexRepository indexes = aJCas.getJFSIndexRepository(); final Iterator<Annotation> sentenceIter = indexes.getAnnotationIndex(Sentence.type).iterator(); while (sentenceIter.hasNext()) { final Sentence sentence = (Sentence) sentenceIter.next(); LOGGER.debug("process() - going to next sentence having length: " + (sentence.getEnd() - sentence .getBegin())); final String text = sentence.getCoveredText(); writeTokensToCAS(text, sentence.getBegin(), aJCas); } } }
LOGGER.debug("process() - original sentence: " + sentence.getCoveredText()); final StringBuffer unitS = new StringBuffer(); for (final Unit unit : unitSentence.getUnits()) {
/** * Returns the text tokenized and pos-tagged, in that order. * * @param text * @return * @throws AnalysisEngineProcessException */ private List<String[]> tokenize(String text) throws AnalysisEngineProcessException { jCas.reset(); jCas.setDocumentText(text); new Sentence(jCas, 0, text.length()).addToIndexes(); jtbd.process(jCas.getCas()); pennbioIEPosTagger.process(jCas.getCas()); return JCasUtil.select(jCas, Token.class).stream() .map(t -> new String[] { t.getCoveredText(), t.getPosTag(0).getValue() }).collect(Collectors.toList()); }
/** Internal - constructor used by generator * @generated * @param addr low level Feature Structure reference * @param type the type of this Feature Structure */ public Sentence(int addr, TOP_Type type) { super(addr, type); readObject(); }
abbreviationIndex, tokenIndex); LOGGER.debug("process() - original sentence: " + sentence.getCoveredText()); StringBuffer unitS = new StringBuffer(); for (Unit unit : unitSentence.getUnits()) {
/** @generated * @param jcas JCas to which this Feature Structure belongs */ public Sentence(JCas jcas) { super(jcas); readObject(); }