de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS.getPosValue java code examples

 /**
  * Annotates the CAS and checks if it's good quality.
  * 
  * @param jcas            An input CAS that will be annotated.
  * @param minTokQty       The minimum number of tokens present to be considered good.
  * @return                true if the CAS contains a high-quality text. 
  * @throws AnalysisEngineProcessException
  */
 public static boolean checkCAS(JCas jcas, int minTokQty) throws AnalysisEngineProcessException {       
  boolean hasNoun = false, hasVerb = false;
  
  for (POS p: JCasUtil.select(jcas, POS.class)) {
   if (p.getPosValue().startsWith("NN")) hasNoun = true;
   if (p.getPosValue().startsWith("VB")) hasVerb = true;
  }
  
  Collection<Token> toks = JCasUtil.select(jcas, Token.class);
  
  return toks.size() >= minTokQty && hasNoun && hasVerb;    
 }  
}

/**
 * @return the POS value if there is a {@link POS} annotation linked to this token.
 */
public String getPosValue()
{
  POS pos = getPos();
  return pos != null ? pos.getPosValue() : null;
}

public String getTextClassificationOutcome(JCas jcas, TextClassificationTarget target)
{
  List<POS> posList = JCasUtil.selectCovered(jcas, POS.class, target);
  String outcome = "";
  if (useCoarseGrained) {
    outcome = posList.get(0).getClass().getSimpleName();
  }
  else {
    outcome = posList.get(0).getPosValue();
  }
  return outcome;
}

public String getTextClassificationOutcome(JCas jcas, TextClassificationTarget aTarget)
{
  List<POS> posList = JCasUtil.selectCovered(jcas, POS.class, aTarget);
  return posList.get(0).getPosValue();
}

private Set<List<String>> getPosNGrams(List<POS> pos)
{
  Set<List<String>> ngrams = new HashSet<List<String>>();
  
  for (int i = 0; i < pos.size() - (n - 1); i++)
  {
    // Generate n-gram at index i
    List<String> ngram = new ArrayList<String>();
    for (int k = 0; k < n; k++)
    {
      String token = pos.get(i + k).getPosValue();
      ngram.add(token);
    }
    
    // Add
    ngrams.add(ngram);
  }
  
  return ngrams;
}

/**
 * @return the POS value if there is a {@link POS} annotation linked to this token.
 */
public String getPosValue()
{
  POS pos = getPos();
  return pos != null ? pos.getPosValue() : null;
}

public String getTextClassificationOutcome(JCas jcas, TextClassificationTarget unit)
{
  List<POS> posList = JCasUtil.selectCovered(jcas, POS.class, unit);
  return posList.get(0).getPosValue().replaceAll(" ", "_");
}

private Set<List<String>> getPosNGrams(List<POS> pos)
{
  Set<List<String>> ngrams = new HashSet<List<String>>();
  
  for (int i = 0; i < pos.size() - (n - 1); i++)
  {
    // Generate n-gram at index i
    List<String> ngram = new ArrayList<String>();
    for (int k = 0; k < n; k++)
    {
      String token = pos.get(i + k).getPosValue();
      ngram.add(token);
    }
    
    // Add
    ngrams.add(ngram);
  }
  
  return ngrams;
}

private Set<List<String>> getPosNGrams(List<POS> pos)
{
  Set<List<String>> ngrams = new HashSet<List<String>>();
  
  for (int i = 0; i < pos.size() - (n - 1); i++)
  {
    // Generate n-gram at index i
    List<String> ngram = new ArrayList<String>();
    for (int k = 0; k < n; k++)
    {
      String token = pos.get(i + k).getPosValue();
      ngram.add(token);
    }
    
    // Add
    ngrams.add(ngram);
  }
  
  return ngrams;
}

public static FrequencyDistribution<String> getDocumentPosNgrams(JCas jcas, int minN, int maxN, boolean useCanonical)
{
  FrequencyDistribution<String> posNgrams = new FrequencyDistribution<String>();
  for (Sentence s : select(jcas, Sentence.class)) {        
    List<String> postagstrings = new ArrayList<String>();
    for (POS p : JCasUtil.selectCovered(jcas, POS.class, s)) {
      if (useCanonical) {
        postagstrings.add(p.getClass().getSimpleName());
      }
      else {
        postagstrings.add(p.getPosValue());
      }
    }
    String[] posarray = postagstrings.toArray(new String[postagstrings.size()]);

    for (List<String> ngram : new NGramStringListIterable(posarray, minN, maxN)) {
      posNgrams.inc(StringUtils.join(ngram, NGRAM_GLUE));
    }
  }
  return posNgrams;
}

private static FrequencyDistribution<String> documentBasedDistribution(JCas jcas,
    Annotation focus, boolean useCanonical, int minN, int maxN)
{
  FrequencyDistribution<String> posNgrams = new FrequencyDistribution<String>();
  List<String> postagstrings = new ArrayList<String>();
  for (POS p : selectCovered(jcas, POS.class, focus)) {
    if (useCanonical) {
      postagstrings.add(p.getClass().getSimpleName());
    }
    else {
      postagstrings.add(p.getPosValue());
    }
  }
  String[] posarray = postagstrings.toArray(new String[postagstrings.size()]);
  for (List<String> ngram : new NGramStringListIterable(posarray, minN, maxN)) {
    posNgrams.inc(StringUtils.join(ngram, NGRAM_GLUE));
  }
  return posNgrams;
}

protected Map<String, Integer> countTokenPoses(JCas text) {
  Map<String, Integer> tokenNumMap = new HashMap<String, Integer>();
  Iterator<Annotation> tokenIter = text.getAnnotationIndex(Token.type)
      .iterator();
  while (tokenIter.hasNext()) {
    Token curr = (Token) tokenIter.next();
    String tokenText = curr.getLemma().getValue().replace("#", "\\#")
        + " ### " + curr.getPos().getPosValue();
    Integer num = tokenNumMap.get(tokenText);
    if (null == num) {
      tokenNumMap.put(tokenText, 1);
    } else {
      tokenNumMap.put(tokenText, num + 1);
    }
  }
  return tokenNumMap;
}

public List<String> getSubstitutions(JCas jcas)
{
  List<String> tokens = new ArrayList<String>();
  List<String> postags = new ArrayList<String>();;
  
  for (Token t : JCasUtil.select(jcas, Token.class))
  {
    try
    {
      tokens.add(t.getLemma().getValue().toLowerCase());
      postags.add(t.getPos().getPosValue());
    }
    catch (NullPointerException e) {
      System.err.println("Couldn't read lemma value for token \"" + t.getCoveredText() + "\"");
    }
  }
  
  return getSubstitutions(tokens, postags);
}

public List<String> getSubstitutions(JCas jcas, Annotation coveringAnnotation)
{
  List<String> tokens = new ArrayList<String>();
  List<String> postags = new ArrayList<String>();;
  
  for (Token t : JCasUtil.selectCovered(jcas, Token.class, coveringAnnotation))
  {
    try
    {
      tokens.add(t.getLemma().getValue().toLowerCase());
      postags.add(t.getPos().getPosValue());
    }
    catch (NullPointerException e) {
      System.err.println("Couldn't read lemma value for token \"" + t.getCoveredText() + "\"");
    }
  }
  
  return getSubstitutions(tokens, postags);
}

@SuppressWarnings("unused")
private boolean hasPos(FeatureStructure fs, String posValue)
{
  if (fs instanceof POS) {
    POS pos = (POS) fs;
    if (pos.getPosValue().equals(posValue)) {
      return true;
    }
  }
  else if (fs instanceof Token) {
    Token token = (Token) fs;
    if (token.getPos().getPosValue().equals(posValue)) {
      return true;
    }
  }
  return false;
}

@Override
public boolean check(JCas aJCas, List<Message> aMessages)
{
  List<Token> withoutPOS = select(aJCas, Token.class).stream()
      .filter(t -> t.getPos() == null)
      .collect(Collectors.toList());
  
  for (Token t : withoutPOS) {
    aMessages.add(new Message(this, ERROR, String.format("Token has no POS: %s [%d..%d]", t
        .getType().getName(), t.getBegin(), t.getEnd())));
  }
  List<Token> withoutPOSValue = select(aJCas, Token.class).stream()
      .filter(t -> t.getPos() != null && t.getPos().getPosValue() == null)
      .collect(Collectors.toList());
  
  for (Token t : withoutPOSValue) {
    aMessages.add(new Message(this, ERROR, String.format(
        "Token has no POS value: %s [%d..%d]", t.getType().getName(), t.getBegin(),
        t.getEnd())));
  }
  return aMessages.stream().anyMatch(m -> m.level == ERROR);
}

  @Override
  public POSSample produce(JCas aJCas)
  {
    // Process present sentences
    Sentence sentence = sentences.next();
    
    // Block on next call to read
    if (!sentences.hasNext()) {
      documentComplete();
    }
    
    List<String> words = new ArrayList<>();
    List<String> tags = new ArrayList<>();
    
    for (Token t : selectCovered(Token.class, sentence)) {
      words.add(t.getText());
      if (t.getPos() == null) {
        throw new IllegalStateException("Token [" + t.getText() + "] has no POS");
      }
      tags.add(t.getPos().getPosValue());
    }
    
    return new POSSample(words, tags);
  }
}

  protected Map<String, String> indexLemmaDepTree(JCas text) {
    Map<String, String> depTree = new HashMap<String, String>();

    for (Dependency dep : JCasUtil.select(text, Dependency.class)) {
      Token child = dep.getDependent();
      Token parent = dep.getGovernor();
      depTree.put(child.getBegin() + " ### "
          + child.getLemma().getValue().replace("#", "\\#") + " ### "
          + child.getPos().getPosValue(), dep.getDependencyType()
          + " ## " + parent.getBegin() + " ### "
          + parent.getLemma().getValue().replace("#", "\\#")
          + " ### " + parent.getPos().getPosValue());
    }

    return depTree;
  }
}

private static NodeInfo buildNodeInfo(JCas jcas, Token tokenAnno, int serial) throws CasTreeConverterException, UnsupportedPosTagStringException {
  String word = tokenAnno.getCoveredText();
  String lemma = tokenAnno.getLemma().getValue();
  String pos = tokenAnno.getPos().getPosValue();
  
  // We rely on the fact the NamedEntity enum values have the same names as the ones
  // specified in the DKPro mapping (e.g. PERSON, ORGANIZATION)
  eu.excitementproject.eop.common.representation.parse.representation.basic.NamedEntity namedEntity=null;
  List<NamedEntity> namedEntities = JCasUtil.selectCovered(NamedEntity.class, tokenAnno);
  switch (namedEntities.size()) {
  case 0: break; // if no NER - ignore and move on
  case 1: namedEntity = eu.excitementproject.eop.common.representation.parse.representation.basic.NamedEntity.valueOf(namedEntities.get(0).getValue());
      break;
  default: throw new CasTreeConverterException(String.format("Got %d NamedEntity annotations for token %s", namedEntities.size(), tokenAnno));
  }
      
  return new DefaultNodeInfo(word, lemma, serial, namedEntity, new DefaultSyntacticInfo(new PennPartOfSpeech(pos)));
}

public static CoreLabel tokenToWord(Token aToken)
{
  CoreLabel t = new CoreLabel();
  
  t.setOriginalText(aToken.getCoveredText());
  t.setWord(aToken.getText());
  t.setBeginPosition(aToken.getBegin());
  t.setEndPosition(aToken.getEnd());
  
  if (aToken.getLemma() != null) {
    t.setLemma(aToken.getLemma().getValue());
  }
  else {
    t.setLemma(aToken.getText());
  }
  
  if (aToken.getPos() != null) {
    t.setTag(aToken.getPos().getPosValue());
  }
  
  return t;
}

Javadoc

getter for PosValue - gets Fine-grained POS tag. This is the tag as produced by a POS tagger or obtained from a reader.

Popular methods of POS

Popular in Java

Reading from database using SQL prepared statement
setScale (BigDecimal)
onRequestPermissionsResult (Fragment)
getSharedPreferences (Context)
IOException (java.io)
Signals a general, I/O-related error. Error details may be specified when calling the constructor, a
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
JPanel (javax.swing)
JTable (javax.swing)
Best IntelliJ plugins

How to use getPosValuemethodin de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS

Best Java code snippets using de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS.getPosValue (Showing top 20 results out of 369)

How to use
getPosValue
method
in
de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS