@Override public Tree next() { if (line == null) { throw new NoSuchElementException(); } Reader lineReader = new StringReader(line); line = null; List<Word> words; if (tokenized) { words = WhitespaceTokenizer.newWordWhitespaceTokenizer(lineReader).tokenize(); } else { words = PTBTokenizer.newPTBTokenizer(lineReader).tokenize(); } if (!words.isEmpty()) { // the parser throws an exception if told to parse an empty sentence. Tree parseTree = lp.apply(words); return parseTree; } else { return new SimpleTree(); } }
System.out.println("Processing sentence: " + line); PTBTokenizer<Word> ptb = PTBTokenizer.newPTBTokenizer(new StringReader(line)); List<Word> words = ptb.tokenize(); Tree parseTree = lp.parseTree(words); tb.add(parseTree);
List<CoreLabel> words = ptb.tokenize();
PTBTokenizer ptbt = new PTBTokenizer( new StringReader(text), new CoreLabelTokenFactory(), "ptb3Escaping=false"); List<List<CoreLabel>> sents = (new WordToSentenceProcessor()).process(ptbt.tokenize()); Vector<String> sentences = new Vector<String>(); for (List<CoreLabel> sent : sents) { StringBuilder sb = new StringBuilder(""); for (CoreLabel w : sent) sb.append(w + " "); sentences.add(sb.toString()); } }
@Override public Tree next() { if (line == null) { throw new NoSuchElementException(); } Reader lineReader = new StringReader(line); line = null; List<Word> words; if (tokenized) { words = WhitespaceTokenizer.newWordWhitespaceTokenizer(lineReader).tokenize(); } else { words = PTBTokenizer.newPTBTokenizer(lineReader).tokenize(); } if (!words.isEmpty()) { // the parser throws an exception if told to parse an empty sentence. Tree parseTree = lp.apply(words); return parseTree; } else { return new SimpleTree(); } }
public List<Word> tokenize(String string) { this.tokenizer = new PTBTokenizer<Word>( new StringReader(string), new WordTokenFactory(), "untokenizable=noneDelete,ptb3Escaping=true"); try { return tokenizer.tokenize(); } catch (Exception e) { System.err.println(e.getMessage()); final List<Word> tokens = new ArrayList<Word>(); for (String token : pennTokenizer.tokenize(string).split("\\s+")) { tokens.add(new Word(token)); } return tokens; } }
@Override public Tree next() { if (line == null) { throw new NoSuchElementException(); } Reader lineReader = new StringReader(line); line = null; List<Word> words; if (tokenized) { words = WhitespaceTokenizer.newWordWhitespaceTokenizer(lineReader).tokenize(); } else { words = PTBTokenizer.newPTBTokenizer(lineReader).tokenize(); } if (!words.isEmpty()) { // the parser throws an exception if told to parse an empty sentence. Tree parseTree = lp.apply(words); return parseTree; } else { return new SimpleTree(); } }
List<Word> words = ptb.tokenize(); if (!words.isEmpty()) {
@Override public Tree next() { if (line == null) { throw new NoSuchElementException(); } Reader lineReader = new StringReader(line); line = null; List<Word> words; if (tokenized) { words = WhitespaceTokenizer.newWordWhitespaceTokenizer(lineReader).tokenize(); } else { words = PTBTokenizer.newPTBTokenizer(lineReader).tokenize(); } if (!words.isEmpty()) { // the parser throws an exception if told to parse an empty sentence. Tree parseTree = lp.apply(words); return parseTree; } else { return new SimpleTree(); } }
System.out.println("Processing sentence: " + line); PTBTokenizer<Word> ptb = PTBTokenizer.newPTBTokenizer(new StringReader(line)); List<Word> words = ptb.tokenize(); lp.parse(words); Tree parseTree = lp.getBestParse();