public Instance pipe(Instance carrier) { StringTokenization ts = (StringTokenization) carrier.getData(); StringTokenization newTs = new StringTokenization((CharSequence) ts.getDocument ()); final LabelAlphabet dict = (LabelAlphabet) getTargetAlphabet(); LabelSequence labelSeq = new LabelSequence(dict); Label start = dict.lookupLabel ("start"); Label notstart = dict.lookupLabel ("notstart"); boolean lastWasSpace = true; StringBuffer sb = new StringBuffer(); for (int i = 0; i < ts.size(); i++) { StringSpan t = (StringSpan) ts.getSpan(i); if (t.getText().equals(" ")) lastWasSpace = true; else { sb.append(t.getText()); newTs.add(t); labelSeq.add(lastWasSpace ? "start" : "notstart"); lastWasSpace = false; } } if (isTargetProcessing()) carrier.setTarget(labelSeq); carrier.setData(newTs); carrier.setSource(sb.toString()); return carrier; }
public Instance pipe(Instance carrier) { StringTokenization ts = (StringTokenization) carrier.getData(); StringTokenization newTs = new StringTokenization((CharSequence) ts.getDocument ()); final LabelAlphabet dict = (LabelAlphabet) getTargetAlphabet(); LabelSequence labelSeq = new LabelSequence(dict); Label start = dict.lookupLabel ("start"); Label notstart = dict.lookupLabel ("notstart"); boolean lastWasSpace = true; StringBuffer sb = new StringBuffer(); for (int i = 0; i < ts.size(); i++) { StringSpan t = (StringSpan) ts.getSpan(i); if (t.getText().equals(" ")) lastWasSpace = true; else { sb.append(t.getText()); newTs.add(t); labelSeq.add(lastWasSpace ? "start" : "notstart"); lastWasSpace = false; } } if (isTargetProcessing()) carrier.setTarget(labelSeq); carrier.setData(newTs); carrier.setSource(sb.toString()); return carrier; }