partsOfSpeech = tagger.tag(tokens);
partsOfSpeech = tagger.tag(tokens);
public String[] tag(String[] sentence) { return this.tag(sentence, null); }
final List<String> posTags = Arrays.asList(this.posTagger.tag( sentenceTokenList.toArray(new String[sentenceTokenList.size()])));
/** * Produces a multidimensional array containing all the tagging * possible for a given sentence. * @param tokens the tokens * @return the array containing for each row the tags */ public final String[][] getAllPosTags(final String[] tokens) { final String[][] allPosTags = this.posTagger.tag(13, tokens); return allPosTags; }
/** * POS tag the current sentence. * @param tokens the current sentence * @return the array containing the pos tags */ public String[] posTag(String[] tokens) { String[] posTags = posTagger.tag(tokens); return posTags; }
public String[] tag(String[] sentence) { return this.tag(sentence, null); }
public String[] tag(String[] sentence) { return this.tag(sentence, null); }
String[] tags = tagger.tag(whitespaceTokenizerLine);
@Override public String[] getTags(String[] words) { return tagger.get().tag(words); } }
/** * Produce postags from a tokenized sentence. * * @param tokens * the sentence * @return a list containing the postags */ public final List<String> posAnnotate(final String[] tokens) { final String[] annotatedText = this.posTagger.tag(tokens); final List<String> posTags = new ArrayList<String>( Arrays.asList(annotatedText)); return posTags; }
@Deprecated public static void test(AbstractModel model) throws IOException { POSTaggerME tagger = new POSTaggerME(model, (TagDictionary) null); BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); for (String line = in.readLine(); line != null; line = in.readLine()) { System.out.println(tagger.tag(line)); } }
/** * {@inheritDoc} */ @Override public List<PartsOfSpeechToken> tag(String[] tokenizedSentence) { List<PartsOfSpeechToken> partsOfSpeechTokens = new LinkedList<PartsOfSpeechToken>(); String[] tags = tagger.tag(tokenizedSentence); for (int i = 0; i < tokenizedSentence.length; i++) { final String token = tokenizedSentence[i].trim(); final String tag = tags[i].trim(); partsOfSpeechTokens.add(new PartsOfSpeechToken(token, tag)); } return partsOfSpeechTokens; }
POSModel model = new POSModelLoader().load(new File("en-pos-maxent.bin")); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); POSTaggerME tagger = new POSTaggerME(model); BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter("C:\\Users\\dell\\Desktop\\file2.txt")); BufferedReader bufferedReader = new BufferedReader(new FileReader("C:\\Users\\dell\\Desktop\\file.txt")); String line = ""; while((line = bufferedReader.readLine()) != null){ String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line); String[] tags = tagger.tag(whitespaceTokenizerLine); // Do your work with your tags and tokenized words bufferedWriter.write(/* the string which is needed to be written to your output */); // for adding new-lines in the output file, uncomment the following line: //bufferedWriter.newLine(); } //Do not forget to flush() and close() the streams after your job is done: bufferedWriter.flush(); bufferedWriter.close(); bufferedReader.close();
/** * {@inheritDoc} */ @Override public List<PartsOfSpeechToken> tag(String[] sentences, Tokenizer tokenizer) { List<PartsOfSpeechToken> partsOfSpeechTokens = new LinkedList<PartsOfSpeechToken>(); for (String sentence : sentences) { String tokenizedSentence[] = tokenizer.tokenize(sentence); String[] tags = tagger.tag(tokenizedSentence); for (int i = 0; i < tokenizedSentence.length; i++) { final String token = tokenizedSentence[i].trim(); final String tag = tags[i].trim(); partsOfSpeechTokens.add(new PartsOfSpeechToken(token, tag)); } } return partsOfSpeechTokens; }
public static ArrayList<WordPOS> getAllRelevantWords(String[] sentence) { ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>(); String[] tags = WSDHelper.getTagger().tag(sentence); for (int i = 0; i < sentence.length; i++) { if (!WSDHelper.getStopCache().containsKey(sentence[i])) { if (WSDHelper.getRelvCache().containsKey(tags[i])) { relevantWords.add(new WordPOS(sentence[i], tags[i])); } } } return relevantWords; }
@Override public List<TaggedWord> getTaggedList(String[] words) { final String[] tags = tagger.get().tag(words); final ArrayList<TaggedWord> taggedWords = new ArrayList<>(); for (int i = 0; i < words.length; i++) { // TODO: check - there is no method to get words begin and end positions final TaggedWord taggedWord = new TaggedWord(); taggedWord.setWord(words[i]); taggedWord.setTag(PartOfSpeech.fromPennTreebank(tags[i])); taggedWords.add(taggedWord); } return taggedWords; }
/** * {@inheritDoc} */ @Override public List<PartsOfSpeechToken> tag(String input, SentenceDetector sentenceDetector, Tokenizer tokenizer) { String[] sentences = sentenceDetector.sentDetect(input); List<PartsOfSpeechToken> partsOfSpeechTokens = new LinkedList<PartsOfSpeechToken>(); for (String sentence : sentences) { String tokenizedSentence[] = tokenizer.tokenize(sentence); String[] tags = tagger.tag(tokenizedSentence); for (int i = 0; i < tokenizedSentence.length; i++) { final String token = tokenizedSentence[i].trim(); final String tag = tags[i].trim(); partsOfSpeechTokens.add(new PartsOfSpeechToken(token, tag)); } } return partsOfSpeechTokens; }
@Override public List<PosToken> tagPOS(String sentence) { String[] tokens = tokenizer.tokenize(sentence); Span[] spans = tokenizer.tokenizePos(sentence); String[] tags = tagger.tag(tokens); List<PosToken> poss = new ArrayList<>(); for (int i = 0; i < tokens.length; i++) { poss.add(new PosToken(tokens[i], tags[i], spans[i].getStart(), spans[i].getEnd())); } return poss; }
@Override public void annotate(Annotation sentence) { POSTaggerME posTagger = new POSTaggerME(loadPOSTagger(sentence.getLanguage())); String[] tokens = sentence.tokens().stream().map(Object::toString).toArray(String[]::new); String[] tags = posTagger.tag(tokens); for (int i = 0; i < tokens.length; i++) { Annotation token = sentence.tokenAt(i); token.put(Types.PART_OF_SPEECH, POS.fromString(tags[i])); } }