public SerialPipes newSerialPipesFromSuffix (Predicate testForStartingNewPipes) { int i = 0; while (i < pipes.size()) if (testForStartingNewPipes.predicate(pipes.get(i))) { return new SerialPipes(pipes.subList(i, pipes.size()-1)); } throw new IllegalArgumentException ("No pipes in this SerialPipe satisfied starting predicate."); }
public SerialPipes newSerialPipesFromSuffix (Predicate testForStartingNewPipes) { int i = 0; while (i < pipes.size()) if (testForStartingNewPipes.predicate(pipes.get(i))) { return new SerialPipes(pipes.subList(i, pipes.size()-1)); } throw new IllegalArgumentException ("No pipes in this SerialPipe satisfied starting predicate."); }
public SerialPipes newSerialPipesFromSuffix (Predicate testForStartingNewPipes) { int i = 0; while (i < pipes.size()) if (testForStartingNewPipes.predicate(pipes.get(i))) { return new SerialPipes(pipes.subList(i, pipes.size()-1)); } throw new IllegalArgumentException ("No pipes in this SerialPipe satisfied starting predicate."); }
public InstanceList malletPreprocess(List<TokenSequence> data) { ArrayList<Pipe> pipeList = new ArrayList<>(); pipeList.add(new TokenSequenceRemoveStopwords(false, false)); pipeList.add(new TokenSequence2FeatureSequence()); InstanceList instances = new InstanceList(new SerialPipes(pipeList)); ArrayIterator dataListIterator = new ArrayIterator(data); instances.addThruPipe(dataListIterator); return instances; }
public static Pipe concatenatePipes (Pipe p1, Pipe p2) { Alphabet dataDict = combinedDataDicts (p1, p2); Alphabet targetDict = combinedTargetDicts (p1, p2); Pipe ret = new SerialPipes (new Pipe[] { p1, p2 }); if (dataDict != null) ret.dataAlphabetResolved = true; if (targetDict != null) ret.targetAlphabetResolved = true; ret.dataAlphabet = dataDict; ret.targetAlphabet = targetDict; return ret; }
public static Pipe concatenatePipes (Pipe p1, Pipe p2) { Alphabet dataDict = combinedDataDicts (p1, p2); Alphabet targetDict = combinedTargetDicts (p1, p2); Pipe ret = new SerialPipes (new Pipe[] { p1, p2 }); if (dataDict != null) ret.dataAlphabetResolved = true; if (targetDict != null) ret.targetAlphabetResolved = true; ret.dataAlphabet = dataDict; ret.targetAlphabet = targetDict; return ret; }
public static Pipe concatenatePipes (Pipe p1, Pipe p2) { Alphabet dataDict = combinedDataDicts (p1, p2); Alphabet targetDict = combinedTargetDicts (p1, p2); Pipe ret = new SerialPipes (new Pipe[] { p1, p2 }); if (dataDict != null) ret.dataAlphabetResolved = true; if (targetDict != null) ret.targetAlphabetResolved = true; ret.dataAlphabet = dataDict; ret.targetAlphabet = targetDict; return ret; }
public InstanceList createInstanceList(File dataFile) throws IOException { InstanceList instanceList = new InstanceList(new SerialPipes(new Pipe[] { new Target2Label(), new Csv2FeatureVector() })); Reader fileReader = new FileReader(dataFile); instanceList.addThruPipe(new DataIterator(fileReader)); fileReader.close(); return instanceList; }
public static void writeInstanceList(ArrayList<Pipe> pipes) throws Exception { Pipe serialPipe = new SerialPipes(pipes); DBInstanceStore saver = new DBInstanceStore(outputDatabase.value); for (String filename: inputFiles.value) { logger.info("importing from " + filename); CsvIterator reader = new CsvIterator(new FileReader(filename), "(.*?)\\t(.*?)\\t(.*)", 3, 2, 1); saver.saveInstances(serialPipe.newIteratorFrom(reader)); } saver.saveAlphabets(serialPipe.getDataAlphabet(), serialPipe.getTargetAlphabet()); saver.cleanup(); }
public static void writeInstanceList(ArrayList<Pipe> pipes) throws Exception { Pipe serialPipe = new SerialPipes(pipes); DBInstanceStore saver = new DBInstanceStore(outputDatabase.value); for (String filename: inputFiles.value) { logger.info("importing from " + filename); CsvIterator reader = new CsvIterator(new FileReader(filename), "(.*?)\\t(.*?)\\t(.*)", 3, 2, 1); saver.saveInstances(serialPipe.newIteratorFrom(reader)); } saver.saveAlphabets(serialPipe.getDataAlphabet(), serialPipe.getTargetAlphabet()); saver.cleanup(); }
public void testPipesAreStupid () { Pipe p1 = new StupidPipe (); Pipe p2 = new SimpleTaggerSentence2TokenSequence (); // initialize p2's dict p2.instanceFrom(new Instance (data, null, null, null)); Pipe serial = new SerialPipes (new Pipe[] { p1, p2 }); try { serial.getDataAlphabet (); assertTrue ("Test failed: Should have generated exception.", false); } catch (IllegalStateException e) {} }
public void testRandomTrained () { Pipe p = new SerialPipes (new Pipe[] { new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector (), new Target2Label()}); double testAcc1 = testRandomTrainedOn (new InstanceList (p)); double testAcc2 = testRandomTrainedOn (new PagedInstanceList (p, 700, 200, new File("."))); assertEquals (testAcc1, testAcc2, 0.01); }
public void testPipesAreStupid () { Pipe p1 = new StupidPipe (); Pipe p2 = new SimpleTaggerSentence2TokenSequence (); // initialize p2's dict p2.instanceFrom(new Instance (data, null, null, null)); Pipe serial = new SerialPipes (new Pipe[] { p1, p2 }); try { serial.getDataAlphabet (); assertTrue ("Test failed: Should have generated exception.", false); } catch (IllegalStateException e) {} }
public void testRandomTrained () { Pipe p = new SerialPipes (new Pipe[] { new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector (), new Target2Label()}); double testAcc1 = testRandomTrainedOn (new InstanceList (p)); double testAcc2 = testRandomTrainedOn (new PagedInstanceList (p, 700, 200, new File("."))); assertEquals (testAcc1, testAcc2, 0.01); }
public static void main(String[] args) { String htmldir = args[0]; Pipe pipe = new SerialPipes(new Pipe[] { new Input2CharSequence(), new CharSequenceRemoveHTML() }); InstanceList list = new InstanceList(pipe); list.addThruPipe(new FileIterator(htmldir, FileIterator.STARTING_DIRECTORIES)); for (int index = 0; index < list.size(); index++) { Instance inst = list.get(index); System.err.println(inst.getData()); } }
public static void main(String[] args) { String htmldir = args[0]; Pipe pipe = new SerialPipes(new Pipe[] { new Input2CharSequence(), new CharSequenceRemoveHTML() }); InstanceList list = new InstanceList(pipe); list.addThruPipe(new FileIterator(htmldir, FileIterator.STARTING_DIRECTORIES)); for (int index = 0; index < list.size(); index++) { Instance inst = list.get(index); System.err.println(inst.getData()); } }
public static void main(String[] args) { String htmldir = args[0]; Pipe pipe = new SerialPipes(new Pipe[] { new Input2CharSequence(), new CharSequenceRemoveHTML() }); InstanceList list = new InstanceList(pipe); list.addThruPipe(new FileIterator(htmldir, FileIterator.STARTING_DIRECTORIES)); for (int index = 0; index < list.size(); index++) { Instance inst = list.get(index); System.err.println(inst.getData()); } }
public Pipe createPipe () { return new SerialPipes (new Pipe[] { new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector ()}); }
public Pipe createPipe () { return new SerialPipes (new Pipe[] { new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector ()}); }
private static Pipe makePipe() { Alphabet alpha = new Alphabet(); JointInputToTokenSequence inputPipe = new JointInputToTokenSequence(alpha, new LabelAlphabet(), new LabelAlphabet()); return new SerialPipes(ImmutableList.of( inputPipe, new TokenSequenceLowercase(), // make all lowercase new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes new NeighborShapeFeature(true, makeShapeNeighs()), new TokenSequenceToFeature(), // convert the strings in the text to features new TokenSequence2FeatureVectorSequence(alpha, true, true) )); }