public static List<Pipe> getPipes() throws Exception { List<Pipe> pipes = newArrayList(); pipes.add(new Jcas2TokenSequence()); pipes.add(new Target2LabelSequence()); // more piiiiipes addAllGoodPipes(pipes); pipes.add(new FeatureWindow(window, window)); // for debugging pipes.add(new PrintInputAndTarget()); pipes.add(new TokenSequence2FeatureVectorSequence()); return pipes; }
public void testStartState() { Pipe p = new SerialPipes(new Pipe[] { new LineGroupString2TokenSequence(), new TokenSequenceMatchDataAndTarget(Pattern .compile("^(\\S+) (.*)"), 2, 1), new TokenSequenceParseFeatureString(false), new TokenText(), new TokenSequence2FeatureVectorSequence(true, false), new Target2LabelSequence(), new PrintInputAndTarget(), }); InstanceList data = new InstanceList(p); data.addThruPipe(new LineGroupIterator(new StringReader(toy), Pattern .compile("\n"), true)); CRF crf = new CRF(p, null); crf.print(); crf.addStatesForLabelsConnectedAsIn(data); crf.addStartState(); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); Optimizable.ByGradientValue maxable = crft.getOptimizableCRF(data); assertEquals(-1.3862, maxable.getValue(), 1e-4); crf = new CRF(p, null); crf .addOrderNStates(data, new int[] { 1 }, null, "A", null, null, false); crf.print(); crft = new CRFTrainerByLabelLikelihood(crf); maxable = crft.getOptimizableCRF(data); assertEquals(-3.09104245335831, maxable.getValue(), 1e-4); }
public void testStartState() { Pipe p = new SerialPipes(new Pipe[] { new LineGroupString2TokenSequence(), new TokenSequenceMatchDataAndTarget(Pattern .compile("^(\\S+) (.*)"), 2, 1), new TokenSequenceParseFeatureString(false), new TokenText(), new TokenSequence2FeatureVectorSequence(true, false), new Target2LabelSequence(), new PrintInputAndTarget(), }); InstanceList data = new InstanceList(p); data.addThruPipe(new LineGroupIterator(new StringReader(toy), Pattern .compile("\n"), true)); CRF crf = new CRF(p, null); crf.print(); crf.addStatesForLabelsConnectedAsIn(data); crf.addStartState(); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); Optimizable.ByGradientValue maxable = crft.getOptimizableCRF(data); assertEquals(-1.3862, maxable.getValue(), 1e-4); crf = new CRF(p, null); crf .addOrderNStates(data, new int[] { 1 }, null, "A", null, null, false); crf.print(); crft = new CRFTrainerByLabelLikelihood(crf); maxable = crft.getOptimizableCRF(data); assertEquals(-3.09104245335831, maxable.getValue(), 1e-4); }
private static Pipe makePipe() { Alphabet alpha = new Alphabet(); Target2LabelSequence labelPipe = new Target2LabelSequence(); LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet(); return new SerialPipes(ImmutableList.of( new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence new TokenSequenceLowercase(), // make all lowercase new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes new NeighborShapeFeature(true, makeShapeNeighs()), new TokenSequenceToFeature(), // convert the strings in the text to features new TokenSequence2FeatureVectorSequence(alpha, true, true), labelPipe, new LabelSequenceToLabelsAssignment(alpha, labelAlpha) )); }
private Pipe makePipe() { Alphabet alpha = new Alphabet(); Target2LabelSequence labelPipe = new Target2LabelSequence(); LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet(); return new SerialPipes(ImmutableList.of( new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence new TokenSequenceLowercase(), // make all lowercase new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes new SurroundingTokenFeature(false), // new SurroundingTokenFeature(true), new NeighborShapeFeature(true, makeShapeNeighs()), new LeadingTrailingFeature(), new TokenSequenceToFeature(), // convert the strings in the text to features new TokenSequence2FeatureVectorSequence(alpha, true, true), labelPipe )); }
private Extraction createExtractionFrom (String[] predStrings, String[] trueStrings) { Pipe pipe = new SerialPipes (new Pipe[] { new SGML2TokenSequence (new CharSequenceLexer (CharSequenceLexer.LEX_NONWHITESPACE_CLASSES ), "O"), new Target2LabelSequence (), new PrintInputAndTarget (), }); InstanceList pred = new InstanceList (pipe); pred.addThruPipe (new ArrayIterator (predStrings)); InstanceList targets = new InstanceList (pipe); targets.addThruPipe (new ArrayIterator (trueStrings)); LabelAlphabet dict = (LabelAlphabet) pipe.getTargetAlphabet (); Extraction extraction = new Extraction (null, dict); for (int i = 0; i < pred.size(); i++) { Instance aPred = pred.get (i); Instance aTarget = targets.get (i); Tokenization input = (Tokenization) aPred.getData (); Sequence predSeq = (Sequence) aPred.getTarget (); Sequence targetSeq = (Sequence) aTarget.getTarget (); DocumentExtraction docextr = new DocumentExtraction ("TEST"+i, dict, input, predSeq, targetSeq, "O"); extraction.addDocumentExtraction (docextr); } return extraction; }
private Extraction createExtractionFrom (String[] predStrings, String[] trueStrings) { Pipe pipe = new SerialPipes (new Pipe[] { new SGML2TokenSequence (new CharSequenceLexer (CharSequenceLexer.LEX_NONWHITESPACE_CLASSES ), "O"), new Target2LabelSequence (), new PrintInputAndTarget (), }); InstanceList pred = new InstanceList (pipe); pred.addThruPipe (new ArrayIterator (predStrings)); InstanceList targets = new InstanceList (pipe); targets.addThruPipe (new ArrayIterator (trueStrings)); LabelAlphabet dict = (LabelAlphabet) pipe.getTargetAlphabet (); Extraction extraction = new Extraction (null, dict); for (int i = 0; i < pred.size(); i++) { Instance aPred = pred.get (i); Instance aTarget = targets.get (i); Tokenization input = (Tokenization) aPred.getData (); Sequence predSeq = (Sequence) aPred.getTarget (); Sequence targetSeq = (Sequence) aTarget.getTarget (); DocumentExtraction docextr = new DocumentExtraction ("TEST"+i, dict, input, predSeq, targetSeq, "O"); extraction.addDocumentExtraction (docextr); } return extraction; }
private Pipe makePipe() { Alphabet alpha = new Alphabet(); Target2LabelSequence labelPipe = new Target2LabelSequence(); LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet(); return new SerialPipes(ImmutableList.of( new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence new TokenSequenceLowercase(), // make all lowercase new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes new SurroundingTokenFeature(false), new SurroundingTokenFeature(true), new NeighborShapeFeature(true, makeShapeNeighs()), new LeadingTrailingFeature(), new TokenSequenceToFeature(), // convert the strings in the text to features new TokenSequence2FeatureVectorSequence(alpha, true, false), labelPipe )); }
private Pipe makePipe() { Alphabet alpha = new Alphabet(); Target2LabelSequence labelPipe = new Target2LabelSequence(); LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet(); return new SerialPipes(ImmutableList.of( new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence new TokenSequenceLowercase(), // make all lowercase new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes new SurroundingTokenFeature(false), new SurroundingTokenFeature(true), new NeighborShapeFeature(true, makeShapeNeighs()), new LeadingTrailingFeature(), new TokenSequenceToFeature(), // convert the strings in the text to features new TokenSequence2FeatureVectorSequence(alpha, true, false), labelPipe )); }
private Pipe makePipe() { Alphabet alpha = new Alphabet(); Target2LabelSequence labelPipe = new Target2LabelSequence(); LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet(); return new SerialPipes(ImmutableList.of( new SWordConverterPipe(), new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence new TokenSequenceLowercase(), // make all lowercase new PhoneNeighborPipe(true, makeNeighbors()), // grab neighboring graphemes new PhoneClassPipe(true, makeClassNeighbors()), new VowelNeighborPipe(), // new SurroundingTokenFeature(false), // new SurroundingTokenFeature(true), // new NeighborShapeFeature(true, makeShapeNeighs()), new IsFirstPipe(), new ThisPhoneClassPipe(), // new AppendEndPipe(), // right before TS2F to get text set, last not to mess w neighbors new TokenSequenceToFeature(), // convert the strings in the text to features new TokenSequence2FeatureVectorSequence(alpha, true, false), labelPipe )); }
private SerialPipes makePipe(Alphabet alpha) { Target2LabelSequence labelPipe = new Target2LabelSequence(); LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();