de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token java code examples

private void annotateTaggingResultsLinkToTokens(JCas aJCas)
{
  List<Token> tokens = getTokens(aJCas);
  List<TextClassificationOutcome> outcomes = getPredictions(aJCas);
  for (int i = 0; i < tokens.size(); i++) {
    Token token = tokens.get(i);
    String outcome = outcomes.get(i).getOutcome();
    POS p = createPartOfSpeechAnnotationFromOutcome(aJCas, token.getBegin(),
        token.getEnd(), outcome);
    token.setPos(p);
  }
}

private static String outToken(Token t) {
  return String.format("%s[%s:%s]", t.getCoveredText(), t.getBegin(), t.getEnd());
}

public static CoreLabel tokenToWord(Token aToken)
{
  CoreLabel t = new CoreLabel();
  
  t.setOriginalText(aToken.getCoveredText());
  t.setWord(aToken.getText());
  t.setBeginPosition(aToken.getBegin());
  t.setEndPosition(aToken.getEnd());
  
  if (aToken.getLemma() != null) {
    t.setLemma(aToken.getLemma().getValue());
  }
  else {
    t.setLemma(aToken.getText());
  }
  
  if (aToken.getPos() != null) {
    t.setTag(aToken.getPos().getPosValue());
  }
  
  return t;
}

/**
 * @return the token text taking into account a {@link TokenForm} annotation that might exist.
 */
public String getText()
{
  String form = getFormValue();
  return form != null ? form : getCoveredText();
}

protected void setToken(JCas aJCas, int begin, int end)
{
  Token token = new Token(aJCas, begin, end);
  token.addToIndexes();
}

/**
 * Given a list of tokens (e.g. those from a sentence) return the one at the specified position.
 */
private Token getToken(List<Token> aTokens, int aBegin, int aEnd)
{
  for (Token t : aTokens) {
    if (aBegin == t.getBegin() && aEnd == t.getEnd()) {
      return t;
    }
  }
  throw new IllegalStateException("Token not found");
}

  private void annotateTokenWithTag(JCas aJCas, String aToken, String aTag, int aCurrPosInText)
  {
    if (readToken) {
      // Token
      Token token = new Token(aJCas, aCurrPosInText, aToken.length() + aCurrPosInText);
      token.addToIndexes();

      if (readPOS) {
        // Tag
        Type posTag = posMappingProvider.getTagType(aTag);
        POS pos = (POS) aJCas.getCas().createAnnotation(posTag, token.getBegin(),
            token.getEnd());
        pos.setPosValue(aTag);
        POSUtils.assignCoarseValue(pos);
        pos.addToIndexes();

        // Set the POS for the Token
        token.setPos(pos);
      }
    }
  }
}

toks[i] = t.getText();
tags[i] = t.getPosValue();
i++;
Lemma lemmaAnno = new Lemma(aJCas, t.getBegin(), t.getEnd());
lemmaAnno.setValue(lemmas[n]);
lemmaAnno.addToIndexes();
t.setLemma(lemmaAnno);
n++;

private void convertLemma(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) {
  if (aCorpusData.getLemmasLayer() == null) {
    return;
  }
  for (int i = 0; i < aCorpusData.getLemmasLayer().size(); i++) {
    eu.clarin.weblicht.wlfxb.tc.api.Token[] lemmaTokens = aCorpusData.getLemmasLayer()
        .getTokens(aCorpusData.getLemmasLayer().getLemma(i));
    String value = aCorpusData.getLemmasLayer().getLemma(i).getString();
    Lemma outLemma = new Lemma(aJCas);
    outLemma.setBegin(aTokens.get(lemmaTokens[0].getID()).getBegin());
    outLemma.setEnd(aTokens.get(lemmaTokens[0].getID()).getEnd());
    outLemma.setValue(value);
    outLemma.addToIndexes();
    // Set the lemma to the token
    aTokens.get(lemmaTokens[0].getID()).setLemma(outLemma);
  }
}

private static void copyParagraphAndTokenAnnotations(JCas source, JCas target)
{
  if (!source.getDocumentText().equals(target.getDocumentText())) {
    throw new IllegalArgumentException("Source and target have different content");
  }
  for (Paragraph p : JCasUtil.select(source, Paragraph.class)) {
    Paragraph paragraph = new Paragraph(target);
    paragraph.setBegin(p.getBegin());
    paragraph.setEnd(p.getEnd());
    paragraph.addToIndexes();
  }
  for (Token t : JCasUtil.select(source, Token.class)) {
    Token token = new Token(target);
    token.setBegin(t.getBegin());
    token.setEnd(t.getEnd());
    token.addToIndexes();
  }
}

@Override
public boolean check(JCas aJCas, List<Message> aMessages)
{
  List<Token> withoutPOS = select(aJCas, Token.class).stream()
      .filter(t -> t.getPos() == null)
      .collect(Collectors.toList());
  
  for (Token t : withoutPOS) {
    aMessages.add(new Message(this, ERROR, String.format("Token has no POS: %s [%d..%d]", t
        .getType().getName(), t.getBegin(), t.getEnd())));
  }
  List<Token> withoutPOSValue = select(aJCas, Token.class).stream()
      .filter(t -> t.getPos() != null && t.getPos().getPosValue() == null)
      .collect(Collectors.toList());
  
  for (Token t : withoutPOSValue) {
    aMessages.add(new Message(this, ERROR, String.format(
        "Token has no POS value: %s [%d..%d]", t.getType().getName(), t.getBegin(),
        t.getEnd())));
  }
  return aMessages.stream().anyMatch(m -> m.level == ERROR);
}

  @Override
  public void token(Token aToken, String aPos, String aLemma)
  {
    synchronized (cas) {
      // Add the Part of Speech
      if (writePos && aPos != null) {
        Type posTag = posMappingProvider.getTagType(aPos);
        POS posAnno = (POS) cas.createAnnotation(posTag, aToken.getBegin(),
            aToken.getEnd());
        posAnno.setPosValue(aPos.intern());
        POSUtils.assignCoarseValue(posAnno);
        aToken.setPos(posAnno);
        pos[count.get()] = posAnno;
      }
      // Add the lemma
      if (writeLemma && aLemma != null) {
        Lemma lemmaAnno = new Lemma(aJCas, aToken.getBegin(), aToken.getEnd());
        lemmaAnno.setValue(aLemma.intern());
        aToken.setLemma(lemmaAnno);
        lemma[count.get()] = lemmaAnno;
      }
      count.getAndIncrement();
    }
  }
});

Lemma l1 = new Lemma(jcas, t1.getBegin(), t1.getEnd());
l1.setValue("lemma1");
l1.addToIndexes();
t1.setLemma(l1);
MorphologicalFeatures m1 = new MorphologicalFeatures(jcas, t1.getBegin(), t1.getEnd());
m1.setValue("morph");
m1.setTense("tense1");
m1.addToIndexes();
t1.setMorph(m1);
POS p1 = new POS(jcas, t1.getBegin(), t1.getEnd());
p1.setPosValue("pos1");
p1.addToIndexes();
t1.setPos(p1);
Stem s1 = new Stem(jcas, t1.getBegin(), t1.getEnd());
s1.setValue("stem1");
s1.addToIndexes();
t1.setStem(s1);

public List<String> getSubstitutions(JCas jcas)
{
  List<String> tokens = new ArrayList<String>();
  List<String> postags = new ArrayList<String>();;
  
  for (Token t : JCasUtil.select(jcas, Token.class))
  {
    try
    {
      tokens.add(t.getLemma().getValue().toLowerCase());
      postags.add(t.getPos().getPosValue());
    }
    catch (NullPointerException e) {
      System.err.println("Couldn't read lemma value for token \"" + t.getCoveredText() + "\"");
    }
  }
  
  return getSubstitutions(tokens, postags);
}

@Override
public void process(final JCas aJCas)
  throws AnalysisEngineProcessException
{
  Token token;
  for (Compound compound : JCasUtil.select(aJCas, Compound.class)) {
    final Token compoundToken = JCasUtil.selectCovered(aJCas, Token.class,
        compound.getBegin(), compound.getEnd()).get(0);
    for (Split compoundPart : compound.getSplitsWithoutMorpheme(compoundSplitLevel)) {
      token = new Token(aJCas);
      token.setBegin(compoundPart.getBegin());
      token.setEnd(compoundPart.getEnd());
      token.setPos(compoundToken.getPos());
      token.addToIndexes();
    }
  }
}

protected Map<String, String> indexDepTree(JCas text) {
  Map<String, String> depTree = new HashMap<String, String>();
  // format: key: 1 ### word ### pos; value: dep_rel ## 2 ### word ### pos
  // escape: .replace("#", "\\#")
  // depTree.put("1 ### The ### Det", "DET ## 2 ### dog ### N");
  // depTree.put("2 ### dog ### N", "SUBJ ## 3 ### chases ### V");
  // depTree.put("3 ### chases ### V", "ROOT ## 0 ### NULL ### NULL");
  // depTree.put("4 ### The ### Det", "DET ## 5 ### cat ### N");
  // depTree.put("5 ### cat ### N", "OBJ ## 3 ### chases ### V");
  for (Dependency dep : JCasUtil.select(text, Dependency.class)) {
    Token child = dep.getDependent();
    Token parent = dep.getGovernor();
    depTree.put(child.getBegin() + " ### "
        + child.getCoveredText().replace("#", "\\#") + " ### "
        + child.getPos().getPosValue(), dep.getDependencyType()
        + " ## " + parent.getBegin() + " ### "
        + parent.getCoveredText().replace("#", "\\#") + " ### "
        + parent.getPos().getPosValue());
  }
  return depTree;
}

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException
{
  if (embedding == null) {
    return;
  }
  Collection<Token> select = JCasUtil.select(aJCas, Token.class);
  for (Token t : select) {
    if (vocab.contains(t.getCoveredText())) {
      continue;
    }
    POS pos = t.getPos();
    if (pos != null) {
      pos.removeFromIndexes();
      t.setPos(null);
    }
    t.removeFromIndexes();
    droppedVocabulary++;
  }
}

  protected Map<String, String> indexLemmaDepTree(JCas text) {
    Map<String, String> depTree = new HashMap<String, String>();

    for (Dependency dep : JCasUtil.select(text, Dependency.class)) {
      Token child = dep.getDependent();
      Token parent = dep.getGovernor();
      depTree.put(child.getBegin() + " ### "
          + child.getLemma().getValue().replace("#", "\\#") + " ### "
          + child.getPos().getPosValue(), dep.getDependencyType()
          + " ## " + parent.getBegin() + " ### "
          + parent.getLemma().getValue().replace("#", "\\#")
          + " ### " + parent.getPos().getPosValue());
    }

    return depTree;
  }
}

@Override
public String getTokenBaseForm(Token token) {
  
  return token.getCoveredText();
  
}

Token tokenAnnot = new Token(jcas1);  
tokenAnnot.setBegin(begin);  
tokenAnnot.setEnd(end); 
tokenAnnot.addToIndexes(); 
Lemma lemmaAnnot = new Lemma(jcas1); 
lemmaAnnot.setBegin(begin); 
tokenAnnot.setLemma(lemmaAnnot);

Javadoc

Token is one of the two types commonly produced by a segmenter (the other being Sentence). A Token usually represents a word, although it may be used to represent multiple tightly connected words (e.g. "New York") or parts of a word (e.g. the possessive "'s"). One may choose to split compound words into multiple tokens, e.g. ("CamelCase" -> "Camel", "Case"; "Zauberstab" -> "Zauber", "stab"). Most processing components operate on Tokens, usually within the limits of the surrounding Sentence. E.g. a part-of-speech tagger analyses each Token in a Sentence and assigns a part-of-speech to each Token.

Most used methods

getBegin
getEnd
getCoveredText
setPos
setter for pos - sets
getPos
getter for pos - gets
<init>
addToIndexes
setLemma
setter for lemma - sets
getLemma
getter for lemma - gets
getText
setMorph
setter for morph - sets The morphological feature associated with this token.
setEnd

Popular in Java

Reactive rest calls using spring rest template
getResourceAsStream (ClassLoader)
getSharedPreferences (Context)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
Proxy (java.net)
This class represents proxy server settings. A created instance of Proxy stores a type and an addres
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
Base64 (org.apache.commons.codec.binary)
Provides Base64 encoding and decoding as defined by RFC 2045.This class implements section 6.8. Base
Project (org.apache.tools.ant)
Central representation of an Ant project. This class defines an Ant project with all of its targets,
Best IntelliJ plugins

How to useToken in de.tudarmstadt.ukp.dkpro.core.api.segmentation.type

Best Java code snippets using de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token (Showing top 20 results out of 774)

How to use
Token
in
de.tudarmstadt.ukp.dkpro.core.api.segmentation.type