/** * Returns a factory for {@link PTBTokenizer}. * * @return A tokenizer */ @Override public TokenizerFactory<CoreLabel> getTokenizerFactory() { return PTBTokenizer.coreLabelFactory(); }
/** @return A PTBTokenizerFactory that vends CoreLabel tokens with default tokenization. */ public static TokenizerFactory<CoreLabel> coreLabelFactory() { return coreLabelFactory(""); }
/** * Returns a factory for {@link PTBTokenizer}. * * @return A tokenizer */ @Override public TokenizerFactory<CoreLabel> getTokenizerFactory() { return PTBTokenizer.coreLabelFactory(); }
/** * Returns a factory for {@link PTBTokenizer}. * * @return A tokenizer */ @Override public TokenizerFactory<CoreLabel> getTokenizerFactory() { return PTBTokenizer.coreLabelFactory(); }
/** * Returns a factory for {@link PTBTokenizer}. * * @return A tokenizer */ @Override public TokenizerFactory<CoreLabel> getTokenizerFactory() { return PTBTokenizer.coreLabelFactory(); }
/** @return A PTBTokenizerFactory that vends CoreLabel tokens with default tokenization. */ public static TokenizerFactory<CoreLabel> coreLabelFactory() { return coreLabelFactory(""); }
/** @return A PTBTokenizerFactory that vends CoreLabel tokens with default tokenization. */ public static TokenizerFactory<CoreLabel> coreLabelFactory() { return coreLabelFactory(""); }
public static List<CoreLabel> stanfordTokenize(String str) { TokenizerFactory<? extends HasWord> tf = PTBTokenizer.coreLabelFactory(); // ptb3Escaping=false -> '(' not converted as '-LRB-', Dont use it, it will cause Dependency resolution err. Tokenizer<? extends HasWord> originalWordTokenizer = tf.getTokenizer(new StringReader(str), "ptb3Escaping=false"); Tokenizer<? extends HasWord> tokenizer = tf.getTokenizer(new StringReader(str)); List<? extends HasWord> originalTokens = originalWordTokenizer.tokenize(); List<? extends HasWord> tokens = tokenizer.tokenize(); // Curse you Stanford! List<CoreLabel> coreLabels = new ArrayList<>(tokens.size()); for (int i = 0; i < tokens.size(); i++) { CoreLabel coreLabel = new CoreLabel(); coreLabel.setWord(tokens.get(i).word()); coreLabel.setOriginalText(originalTokens.get(i).word()); coreLabel.setValue(tokens.get(i).word()); coreLabel.setBeginPosition(((CoreLabel) tokens.get(i)).beginPosition()); coreLabel.setEndPosition(((CoreLabel) tokens.get(i)).endPosition()); coreLabels.add(coreLabel); } return coreLabels; }