@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { if (tempData == null) { try { tempData = File.createTempFile("dkpro-stanford-pos-trainer", ".tsv"); out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(tempData), StandardCharsets.UTF_8)); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } Map<Sentence, Collection<Token>> index = indexCovered(aJCas, Sentence.class, Token.class); for (Sentence sentence : select(aJCas, Sentence.class)) { Collection<Token> tokens = index.get(sentence); for (Token token : tokens) { out.printf("%s\t%s%n", token.getText(), token.getPos().getPosValue()); } out.println(); } }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { if (tempData == null) { try { tempData = File.createTempFile("dkpro-arktweet-pos-trainer", ".tsv"); out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(tempData), StandardCharsets.UTF_8)); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } Map<Sentence, Collection<Token>> index = indexCovered(jCas, Sentence.class, Token.class); for (Sentence sentence : select(jCas, Sentence.class)) { Collection<Token> tokens = index.get(sentence); for (Token token : tokens) { out.printf("%s\t%s%n", token.getText(), token.getPos().getPosValue()); } out.println(); } }
@Override public String getText(Token aToken) { synchronized (aToken.getCAS()) { String pos = mapping.get(aToken.getPosValue()); if (pos == null) { pos = aToken.getPosValue(); } return aToken.getText() + "-" + pos; } } }
@Override public POSSample produce(JCas aJCas) { // Process present sentences Sentence sentence = sentences.next(); // Block on next call to read if (!sentences.hasNext()) { documentComplete(); } List<String> words = new ArrayList<>(); List<String> tags = new ArrayList<>(); for (Token t : selectCovered(Token.class, sentence)) { words.add(t.getText()); if (t.getPos() == null) { throw new IllegalStateException("Token [" + t.getText() + "] has no POS"); } tags.add(t.getPos().getPosValue()); } return new POSSample(words, tags); } }
@Override public ChunkSample produce(JCas aJCas) { // Process present sentences Sentence sentence = sentences.next(); // Block on next call to read if (!sentences.hasNext()) { documentComplete(); } List<String> words = new ArrayList<>(); List<String> tags = new ArrayList<>(); List<String> preds = new ArrayList<>(); for (Token t : selectCovered(Token.class, sentence)) { words.add(t.getText()); if (t.getPos() == null) { throw new IllegalStateException("Token [" + t.getText() + "] has no POS"); } tags.add(t.getPos().getPosValue()); preds.add(chunkEncoder.encode(t)); } return new ChunkSample(words, tags, preds); } }
String form = row.token.getCoveredText(); if (!writeCovered) { form = row.token.getText();
sentenceToEnd[j] = tokensToSentenceEnd.get(j).getText();
private List<TaggedToken> tagTweetTokens(List<Token> annotatedTokens, TweetTagger tweetTagModel) { List<String> tokens = new LinkedList<String>(); for (Token a : annotatedTokens) { String tokenText = a.getText(); tokenText = Twokenize.normalizeTextForTagger(tokenText); tokens.add(tokenText); } Sentence sentence = new Sentence(); sentence.tokens = tokens; ModelSentence ms = new ModelSentence(sentence.T()); tweetTagModel.featureExtractor.computeFeatures(sentence, ms); tweetTagModel.model.greedyDecode(ms, false); ArrayList<TaggedToken> taggedTokens = new ArrayList<TaggedToken>(); for (int t = 0; t < sentence.T(); t++) { TaggedToken tt = new TaggedToken(); tt.token = annotatedTokens.get(t); tt.tag = tweetTagModel.model.labelVocab.name(ms.labels[t]); taggedTokens.add(tt); } return taggedTokens; } }
String form = row.token.getCoveredText(); if (!writeCovered) { form = row.token.getText();
if (token.getPos().getPosValue().matches("PTKVZ.*")) { String particle = token.getText(); String verblemma = "";
Token token = tokens.get(i); if (lemmas[i] == null) { lemmas[i] = token.getText();
int i = 0; for (Token t : tokens) { tokenTexts[i] = t.getText(); if (t.getPos() == null || t.getPos().getPosValue() == null) { throw new IllegalStateException("Every token must have a POS tag.");
toks[i] = t.getText(); tags[i] = t.getPosValue(); i++;
@Override public void process(final JCas aJCas) throws AnalysisEngineProcessException { for (Token token : select(aJCas, Token.class)) { final String coveredText = token.getText(); DecompoundedWord result; result = ranker.highestRank(splitter.split(coveredText)); if (!result.isCompound()) { continue; } final int beginIndex = token.getBegin(); final Compound compound = new Compound(aJCas, beginIndex, token.getEnd()); indexSplits(aJCas, result.getSplits(), beginIndex, token.getEnd(), null, compound); compound.addToIndexes(); } }
words.add(new TaggedWord(t.getText()));
public static CoreLabel tokenToWord(Token aToken) { CoreLabel t = new CoreLabel(); t.setOriginalText(aToken.getCoveredText()); t.setWord(aToken.getText()); t.setBeginPosition(aToken.getBegin()); t.setEndPosition(aToken.getEnd()); if (aToken.getLemma() != null) { t.setLemma(aToken.getLemma().getValue()); } else { t.setLemma(aToken.getText()); } if (aToken.getPos() != null) { t.setTag(aToken.getPos().getPosValue()); } return t; }
/** * Process the sentence to create tokens. */ private void processSentence(JCas aJCas, String text, int zoneBegin) { BreakIterator bi = BreakIterator.getWordInstance(getLocale(aJCas)); bi.setText(text); int last = bi.first() + zoneBegin; int cur = bi.next(); while (cur != BreakIterator.DONE) { cur += zoneBegin; Token token = createToken(aJCas, last, cur); if (token != null) { if (splitAtApostrophe) { int i = token.getText().indexOf("'"); if (i > 0) { i += token.getBegin(); createToken(aJCas, i, token.getEnd()); token.setEnd(i); } } } last = cur; cur = bi.next(); } } }
DEPNode node = new DEPNode(i + 1, tokens.get(i).getText()); node.pos = t.getPos().getPosValue(); if (t.getLemma() != null) {