private void tokenizeDate(String inputDate) { tokens = new ArrayList<>(); Pattern pat = Pattern.compile("[-]"); if (inputDate == null) { System.out.println("Null input date"); } Matcher m = pat.matcher(inputDate); String str = m.replaceAll(" - "); str = str.replaceAll(",", " "); PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(new BufferedReader(new StringReader(str))); while (tokenizer.hasNext()) { Word nextToken = tokenizer.next(); tokens.add(nextToken.toString()); } if(DEBUG) { System.out.println("tokens:" + tokens); } }
CoreLabel obj = tokenizer.next();
private void tokenizeDate(String inputDate) { tokens = new ArrayList<String>(); Pattern pat = Pattern.compile("[-]"); if (inputDate == null) { System.out.println("Null input date"); } Matcher m = pat.matcher(inputDate); String str = m.replaceAll(" - "); str = str.replaceAll(",", " "); PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(new BufferedReader(new StringReader(str))); while (tokenizer.hasNext()) { Word nextToken = tokenizer.next(); tokens.add(nextToken.toString()); } if(DEBUG) { System.out.println("tokens:" + tokens); } }
private void tokenizeDate(String inputDate) { tokens = new ArrayList<String>(); Pattern pat = Pattern.compile("[-]"); if (inputDate == null) { System.out.println("Null input date"); } Matcher m = pat.matcher(inputDate); String str = m.replaceAll(" - "); str = str.replaceAll(",", " "); PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(new BufferedReader(new StringReader(str))); while (tokenizer.hasNext()) { Word nextToken = tokenizer.next(); tokens.add(nextToken.toString()); } if(DEBUG) { System.out.println("tokens:" + tokens); } }
private void tokenizeDate(String inputDate) { tokens = new ArrayList<>(); Pattern pat = Pattern.compile("[-]"); if (inputDate == null) { System.out.println("Null input date"); } Matcher m = pat.matcher(inputDate); String str = m.replaceAll(" - "); str = str.replaceAll(",", " "); PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(new BufferedReader(new StringReader(str))); while (tokenizer.hasNext()) { Word nextToken = tokenizer.next(); tokens.add(nextToken.toString()); } if(DEBUG) { System.out.println("tokens:" + tokens); } }
@Override public String[] tokenize(String sentence) { Reader r=new StringReader(sentence); PTBTokenizer<Word> tokenizer=PTBTokenizer.newPTBTokenizer(r); List<String> l=new ArrayList<String>(); while(tokenizer.hasNext()) l.add(tokenizer.next().word()); String[] tok=new String[l.size()+1]; tok[0]=is2.io.CONLLReader09.ROOT; int i=1; for(String s:l) tok[i++]=s; return tok; }
@Override public String[] tokenize(String sentence) { Reader r = new StringReader(sentence); PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(r); List<String> l = new ArrayList<>(); while (tokenizer.hasNext()) { Word w = tokenizer.next(); l.add(w.word()); } String[] tok = new String[l.size() + 1]; tok[0] = is2.io.CONLLReader09.ROOT; int i = 1; for (String s : l) tok[i++] = s; return tok; }
public StringInText[] tokenizeplus(String sentence) { Reader r = new StringReader(sentence); PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(r); List<StringInText> l = new ArrayList<>(); while (tokenizer.hasNext()) { Word w = tokenizer.next(); l.add(new StringInText(w.word(), w.beginPosition() + startpos, w .endPosition() + startpos)); } StringInText[] tok = new StringInText[l.size() + 1]; tok[0] = new StringInText(is2.io.CONLLReader09.ROOT, 0, 0); int i = 1; for (StringInText s : l) tok[i++] = s; startpos += (1 + sentence.length()); return tok; }
CoreLabel obj = tokenizer.next();
CoreLabel obj = tokenizer.next();
CoreLabel obj = tokenizer.next();
boolean beginLine = true; while (tokenizer.hasNext()) { CoreLabel obj = tokenizer.next(); String str = obj.get(TextAnnotation.class); if (lowerCase) {
public DataBag exec(Tuple input) throws IOException { if (input == null || input.size() < 1 || input.isNull(0)) return null; // Output bag DataBag bagOfTokens = bagFactory.newDefaultBag(); StringReader textInput = new StringReader(input.get(0).toString()); PTBTokenizer ptbt = new PTBTokenizer(textInput, new CoreLabelTokenFactory(), ""); for (CoreLabel label; ptbt.hasNext(); ) { label = (CoreLabel)ptbt.next(); Tuple termText = tupleFactory.newTuple(label.toString()); bagOfTokens.add(termText); } return bagOfTokens; } }