opennlp.tools.postag.POSTaggerME.tag java code examples

partsOfSpeech = tagger.tag(tokens);

partsOfSpeech = tagger.tag(tokens);

public String[] tag(String[] sentence) {
 return this.tag(sentence, null);
}

final List<String> posTags = Arrays.asList(this.posTagger.tag(
  sentenceTokenList.toArray(new String[sentenceTokenList.size()])));

/**
 * Produces a multidimensional array containing all the tagging
 * possible for a given sentence.
 * @param tokens the tokens
 * @return the array containing for each row the tags
 */
public final String[][] getAllPosTags(final String[] tokens) {
 final String[][] allPosTags = this.posTagger.tag(13, tokens);
 return allPosTags;
}

/**
 * POS tag the current sentence.
 * @param tokens the current sentence
 * @return the array containing the pos tags
 */
public String[] posTag(String[] tokens) {
 String[] posTags = posTagger.tag(tokens);
 return posTags;
}

public String[] tag(String[] sentence) {
 return this.tag(sentence, null);
}

public String[] tag(String[] sentence) {
 return this.tag(sentence, null);
}

String[] tags = tagger.tag(whitespaceTokenizerLine);

  @Override
  public String[] getTags(String[] words) {
    return tagger.get().tag(words);
  }
}

/**
 * Produce postags from a tokenized sentence.
 * 
 * @param tokens
 *          the sentence
 * @return a list containing the postags
 */
public final List<String> posAnnotate(final String[] tokens) {
 final String[] annotatedText = this.posTagger.tag(tokens);
 final List<String> posTags = new ArrayList<String>(
   Arrays.asList(annotatedText));
 return posTags;
}

@Deprecated
public static void test(AbstractModel model) throws IOException {
 POSTaggerME tagger = new POSTaggerME(model, (TagDictionary) null);
 BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
 for (String line = in.readLine(); line != null; line = in.readLine()) {
  System.out.println(tagger.tag(line));
 }
}

/**
 * {@inheritDoc}
 */
@Override
public List<PartsOfSpeechToken> tag(String[] tokenizedSentence) {
 List<PartsOfSpeechToken> partsOfSpeechTokens = new LinkedList<PartsOfSpeechToken>();
 String[] tags = tagger.tag(tokenizedSentence);
 for (int i = 0; i < tokenizedSentence.length; i++) {
  final String token = tokenizedSentence[i].trim();
  final String tag = tags[i].trim();
  partsOfSpeechTokens.add(new PartsOfSpeechToken(token, tag));
 }
 return partsOfSpeechTokens;
}

POSModel model = new POSModelLoader().load(new File("en-pos-maxent.bin"));
 PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
 POSTaggerME tagger = new POSTaggerME(model);
 BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter("C:\\Users\\dell\\Desktop\\file2.txt"));
 BufferedReader bufferedReader = new BufferedReader(new FileReader("C:\\Users\\dell\\Desktop\\file.txt"));
 String line = "";
 while((line = bufferedReader.readLine()) != null){
   String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
   String[] tags = tagger.tag(whitespaceTokenizerLine);
   // Do your work with your tags and tokenized words
   bufferedWriter.write(/* the string which is needed to be written to your output */);
   // for adding new-lines in the output file, uncomment the following line:
   //bufferedWriter.newLine();
 }
 //Do not forget to flush() and close() the streams after your job is done:
 bufferedWriter.flush();
 bufferedWriter.close();
 bufferedReader.close();

/**
 * {@inheritDoc}
 */
@Override
public List<PartsOfSpeechToken> tag(String[] sentences, Tokenizer tokenizer) {
 List<PartsOfSpeechToken> partsOfSpeechTokens = new LinkedList<PartsOfSpeechToken>();
 for (String sentence : sentences) {
  String tokenizedSentence[] = tokenizer.tokenize(sentence);
  String[] tags = tagger.tag(tokenizedSentence);
  for (int i = 0; i < tokenizedSentence.length; i++) {
   final String token = tokenizedSentence[i].trim();
   final String tag = tags[i].trim();
   partsOfSpeechTokens.add(new PartsOfSpeechToken(token, tag));
  }
 }
 return partsOfSpeechTokens;
}

public static ArrayList<WordPOS> getAllRelevantWords(String[] sentence) {
 ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
 String[] tags = WSDHelper.getTagger().tag(sentence);
 for (int i = 0; i < sentence.length; i++) {
  if (!WSDHelper.getStopCache().containsKey(sentence[i])) {
   if (WSDHelper.getRelvCache().containsKey(tags[i])) {
    relevantWords.add(new WordPOS(sentence[i], tags[i]));
   }
  }
 }
 return relevantWords;
}

@Override
public List<TaggedWord> getTaggedList(String[] words) {
  final String[] tags = tagger.get().tag(words);
  final ArrayList<TaggedWord> taggedWords = new ArrayList<>();
  for (int i = 0; i < words.length; i++) {
    // TODO: check - there is no method to get words begin and end positions
    final TaggedWord taggedWord = new TaggedWord();
    taggedWord.setWord(words[i]);
    taggedWord.setTag(PartOfSpeech.fromPennTreebank(tags[i]));
    taggedWords.add(taggedWord);
  }
  return taggedWords;
}

/**
 * {@inheritDoc}
 */
@Override
public List<PartsOfSpeechToken> tag(String input, SentenceDetector sentenceDetector, Tokenizer tokenizer) {
 String[] sentences = sentenceDetector.sentDetect(input);
 List<PartsOfSpeechToken> partsOfSpeechTokens = new LinkedList<PartsOfSpeechToken>();
 for (String sentence : sentences) {
  String tokenizedSentence[] = tokenizer.tokenize(sentence);
  String[] tags = tagger.tag(tokenizedSentence);
  for (int i = 0; i < tokenizedSentence.length; i++) {
   final String token = tokenizedSentence[i].trim();
   final String tag = tags[i].trim();
   partsOfSpeechTokens.add(new PartsOfSpeechToken(token, tag));
  }
 }
 return partsOfSpeechTokens;
}

@Override
public List<PosToken> tagPOS(String sentence) {
 String[] tokens = tokenizer.tokenize(sentence);
 Span[] spans = tokenizer.tokenizePos(sentence);
 String[] tags = tagger.tag(tokens);
 List<PosToken> poss = new ArrayList<>();
 for (int i = 0; i < tokens.length; i++) {
  poss.add(new PosToken(tokens[i], tags[i], spans[i].getStart(), spans[i].getEnd()));
 }
 return poss;
}

@Override
public void annotate(Annotation sentence) {
 POSTaggerME posTagger = new POSTaggerME(loadPOSTagger(sentence.getLanguage()));
 String[] tokens = sentence.tokens().stream().map(Object::toString).toArray(String[]::new);
 String[] tags = posTagger.tag(tokens);
 for (int i = 0; i < tokens.length; i++) {
  Annotation token = sentence.tokenAt(i);
  token.put(Types.PART_OF_SPEECH, POS.fromString(tags[i]));
 }
}

Javadoc

Returns at most the specified number of taggings for the specified sentence.

Popular in Java

Running tasks concurrently on multiple threads
setScale (BigDecimal)
getSharedPreferences (Context)
getExternalFilesDir (Context)
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
JarFile (java.util.jar)
JarFile is used to read jar entries and their associated data from jar files.
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
ImageIO (javax.imageio)
Option (scala)
Top PhpStorm plugins

How to use tagmethodin opennlp.tools.postag.POSTaggerME

Best Java code snippets using opennlp.tools.postag.POSTaggerME.tag (Showing top 20 results out of 315)

How to use
tag
method
in
opennlp.tools.postag.POSTaggerME