public Pipe createPipe () { return new SerialPipes (new Pipe[] { new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector ()}); }
public Pipe createPipe () { return new SerialPipes (new Pipe[] { new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector ()}); }
(Pipe) new TargetStringToFeatures(), (Pipe) new CharSequence2TokenSequence(), (Pipe) new TokenSequenceLowercase(), (Pipe) new TokenSequenceRemoveStopwords(false, false), (Pipe) new TokenSequence2FeatureSequence()
(Pipe) new TargetStringToFeatures(), (Pipe) new CharSequence2TokenSequence(), (Pipe) new TokenSequenceLowercase(), (Pipe) new TokenSequenceRemoveStopwords(false, false), (Pipe) new TokenSequence2FeatureSequence()
(Pipe) new TargetStringToFeatures(), (Pipe) new CharSequence2TokenSequence(), (Pipe) new TokenSequenceLowercase(), (Pipe) new TokenSequenceRemoveStopwords(false, false), (Pipe) new TokenSequence2FeatureSequence()
public void testThree () { InstanceList il = new InstanceList ( new SerialPipes (new Pipe[] { new Target2Label (), new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new TokenSequenceRemoveStopwords (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector () })); Iterator<Instance> pi = new FileIterator (new File("foo/bar"), null, Pattern.compile("^([^/]*)/")); il.addThruPipe (pi); }
public void testThree () { InstanceList il = new InstanceList ( new SerialPipes (new Pipe[] { new Target2Label (), new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new TokenSequenceRemoveStopwords (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector () })); Iterator<Instance> pi = new FileIterator (new File("foo/bar"), null, Pattern.compile("^([^/]*)/")); il.addThruPipe (pi); }
public void testSpacePipe () { Pipe p = new SerialPipes (new Pipe[] { new CharSequence2TokenSequence ("."), new TokenSequenceLowercase (), new TestCRF.TestCRFTokenSequenceRemoveSpaces (), new TokenText (), new OffsetConjunctions (false, new int[][] {{0}, {1},{-1,0},{0,1}, {-2,-1,0}, {0,1,2}, {-3,-2,-1}, {1,2,3}, }), new PrintInputAndTarget(), }); // Print to a string ByteArrayOutputStream out = new ByteArrayOutputStream (); PrintStream oldOut = System.out; System.setOut (new PrintStream (out)); InstanceList lst = new InstanceList (p); lst.addThruPipe (new ArrayIterator (new String[] { TestCRF.data[0], TestCRF.data[1], })); System.setOut (oldOut); assertEquals (spacePipeOutput, out.toString()); }
public static Pipe makeSpacePredictionPipe () { Pipe p = new SerialPipes(new Pipe[]{ new CharSequence2TokenSequence("."), new TokenSequenceLowercase(), new TestMEMMTokenSequenceRemoveSpaces(), new TokenText(), new OffsetConjunctions(true, new int[][]{//{0}, /*{1},{-1,0},{0,1}, */ {1}, {-1, 0}, {0, 1}, // {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3}, //{-2,-1}, {-1,0}, {0,1}, {1,2}, //{-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3}, }), // new PrintInputAndTarget(), new TokenSequence2FeatureVectorSequence() }); return p; }
public void testSpacePipe () { Pipe p = new SerialPipes (new Pipe[] { new CharSequence2TokenSequence ("."), new TokenSequenceLowercase (), new TestCRF.TestCRFTokenSequenceRemoveSpaces (), new TokenText (), new OffsetConjunctions (false, new int[][] {{0}, {1},{-1,0},{0,1}, {-2,-1,0}, {0,1,2}, {-3,-2,-1}, {1,2,3}, }), new PrintInputAndTarget(), }); // Print to a string ByteArrayOutputStream out = new ByteArrayOutputStream (); PrintStream oldOut = System.out; System.setOut (new PrintStream (out)); InstanceList lst = new InstanceList (p); lst.addThruPipe (new ArrayIterator (new String[] { TestCRF.data[0], TestCRF.data[1], })); System.setOut (oldOut); assertEquals (spacePipeOutput, out.toString()); }
public static Pipe makeSpacePredictionPipe () { Pipe p = new SerialPipes(new Pipe[]{ new CharSequence2TokenSequence("."), new TokenSequenceLowercase(), new TestMEMMTokenSequenceRemoveSpaces(), new TokenText(), new OffsetConjunctions(true, new int[][]{//{0}, /*{1},{-1,0},{0,1}, */ {1}, {-1, 0}, {0, 1}, // {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3}, //{-2,-1}, {-1,0}, {0,1}, {1,2}, //{-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3}, }), // new PrintInputAndTarget(), new TokenSequence2FeatureVectorSequence() }); return p; }
private Pipe makeSpacePredictionPipe() { Pipe p = new SerialPipes(new Pipe[] { new CharSequence2TokenSequence("."), new TokenSequenceLowercase(), new TestCRFTokenSequenceRemoveSpaces(), new TokenText(), new OffsetConjunctions(true, new int[][] { { 0 }, { 1 }, { -1, 0 }, // Original test had this conjunction in it too // {1},{-1,0},{0,1}, // {0, 1}, // I'd like to comment out this next line to make it run // faster, but then we'd need to adjust likelihood and // accuracy test values. -akm 12/2007 // TODO uncomment this line // {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3}, // (These were commented before...) // {-2,-1}, {-1,0}, {0,1}, {1,2}, // {-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3}, }), // new PrintInputAndTarget(), new TokenSequence2FeatureVectorSequence() }); return p; }
private Pipe makeSpacePredictionPipe() { Pipe p = new SerialPipes(new Pipe[] { new CharSequence2TokenSequence("."), new TokenSequenceLowercase(), new TestCRFTokenSequenceRemoveSpaces(), new TokenText(), new OffsetConjunctions(true, new int[][] { { 0 }, { 1 }, { -1, 0 }, // Original test had this conjunction in it too // {1},{-1,0},{0,1}, // {0, 1}, // I'd like to comment out this next line to make it run // faster, but then we'd need to adjust likelihood and // accuracy test values. -akm 12/2007 // TODO uncomment this line // {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3}, // (These were commented before...) // {-2,-1}, {-1,0}, {0,1}, {1,2}, // {-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3}, }), // new PrintInputAndTarget(), new TokenSequence2FeatureVectorSequence() }); return p; }
public void testTwo () { Pipe p = new SerialPipes (new Pipe[] { new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new RegexMatches ("vowel", Pattern.compile ("[aeiou]")), new RegexMatches ("firsthalf", Pattern.compile ("[a-m]")), new RegexMatches ("secondhalf", Pattern.compile ("[n-z]")), new RegexMatches ("length2", Pattern.compile ("..")), new RegexMatches ("length3", Pattern.compile ("...")), new PrintInput (), new TokenSequence2TokenInstances()}); InstanceList ilist = new InstanceList (p); ilist.addThruPipe (new StringArrayIterator(data)); assert (ilist.size() == 19) : "list size = "+ilist.size(); assertTrue (ilist.size() == 19); }
private static Pipe makePipe() { Alphabet alpha = new Alphabet(); JointInputToTokenSequence inputPipe = new JointInputToTokenSequence(alpha, new LabelAlphabet(), new LabelAlphabet()); return new SerialPipes(ImmutableList.of( inputPipe, new TokenSequenceLowercase(), // make all lowercase new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes new NeighborShapeFeature(true, makeShapeNeighs()), new TokenSequenceToFeature(), // convert the strings in the text to features new TokenSequence2FeatureVectorSequence(alpha, true, true) )); }
public void testTwo () { Pipe p = new SerialPipes (new Pipe[] { new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new RegexMatches ("vowel", Pattern.compile ("[aeiou]")), new RegexMatches ("firsthalf", Pattern.compile ("[a-m]")), new RegexMatches ("secondhalf", Pattern.compile ("[n-z]")), new RegexMatches ("length2", Pattern.compile ("..")), new RegexMatches ("length3", Pattern.compile ("...")), new PrintInput (), new TokenSequence2TokenInstances()}); InstanceList ilist = new InstanceList (p); ilist.addThruPipe (new StringArrayIterator(data)); assert (ilist.size() == 19) : "list size = "+ilist.size(); assertTrue (ilist.size() == 19); }
new TokenSequenceLowercase(), new TokenSequenceRemoveStopwords(), new TokenSequence2FeatureSequence(),
new TokenSequenceLowercase(), new TokenSequenceRemoveStopwords(), new TokenSequence2FeatureSequence(),
private static Pipe makePipe() { Alphabet alpha = new Alphabet(); Target2LabelSequence labelPipe = new Target2LabelSequence(); LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet(); return new SerialPipes(ImmutableList.of( new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence new TokenSequenceLowercase(), // make all lowercase new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes new NeighborShapeFeature(true, makeShapeNeighs()), new TokenSequenceToFeature(), // convert the strings in the text to features new TokenSequence2FeatureVectorSequence(alpha, true, true), labelPipe, new LabelSequenceToLabelsAssignment(alpha, labelAlpha) )); }
new TokenSequenceLowercase(), new TokenSequenceRemoveStopwords(), new TokenSequence2FeatureSequence(),