public CRF (Pipe inputPipe, Pipe outputPipe) { super (inputPipe, outputPipe); this.inputAlphabet = inputPipe.getDataAlphabet(); this.outputAlphabet = inputPipe.getTargetAlphabet(); //inputAlphabet.stopGrowth(); }
public Iterator<Instance> newIteratorFrom (Iterator<Instance> source) { if (pipes.size() == 0) return source; Iterator<Instance> ret = pipes.get(0).newIteratorFrom(source); for (int i = 1; i < pipes.size(); i++) ret = pipes.get(i).newIteratorFrom(ret); return ret; }
public METrainerDummyPipe(final Alphabet data, final Alphabet label) { super.setDataAlphabet(data); super.setTargetAlphabet(label); }
private void resolveAlphabets () { Alphabet da = null, ta = null; for (Pipe p : pipes) { p.preceedingPipeDataAlphabetNotification(da); da = p.getDataAlphabet(); p.preceedingPipeTargetAlphabetNotification(ta); ta = p.getTargetAlphabet(); } dataAlphabet = da; targetAlphabet = ta; }
/** Take input sequence from instance.data and put the output sequence in instance.data. */ public Instance transduce (Instance instance) { if (inputPipe != null) instance = inputPipe.instanceFrom(instance); // TODO Use MaxLatticeFactory instead of hardcoding instance.setData(new MaxLatticeDefault(this, (Sequence)instance.getData()).bestOutputSequence()); if (outputPipe != null) instance = outputPipe.instanceFrom(instance); return instance; }
p.getTargetAlphabet().lookupIndex(defaultOption.value); p.setTargetProcessing(true); trainingData = new InstanceList(p); trainingData.addThruPipe(new LineGroupIterator(trainingFile, Pattern.compile("^\\s*$"), true)); logger.info("Number of features in training data: "+p.getDataAlphabet().size()); p.setTargetProcessing(true); testData = new InstanceList(p); testData.addThruPipe(new LineGroupIterator(testFile, p.setTargetProcessing(false); testData = new InstanceList(p); testData.addThruPipe( Pattern.compile("^\\s*$"), true)); logger.info ("Number of predicates: "+p.getDataAlphabet().size()); if (p.isTargetProcessing()) { Alphabet targets = p.getTargetAlphabet(); StringBuffer buf = new StringBuffer("Labels:"); for (int i = 0; i < targets.size(); i++)
public void testConcatenatePipes () { Pipe p1 = new StupidPipe (); Pipe p2 = new SimpleTagger.SimpleTaggerSentence2FeatureVectorSequence (); // initialize p2's dict p2.instanceFrom(new Instance (data, null, null, null)); assertEquals (3, p2.getDataAlphabet ().size()); Pipe serial = PipeUtils.concatenatePipes (p1, p2); Alphabet dict = serial.getDataAlphabet (); assertEquals (3, dict.size ()); assertTrue (dict == p2.getDataAlphabet ()); }
public static void writeInstanceList(ArrayList<Pipe> pipes) throws Exception { Pipe serialPipe = new SerialPipes(pipes); DBInstanceStore saver = new DBInstanceStore(outputDatabase.value); for (String filename: inputFiles.value) { logger.info("importing from " + filename); CsvIterator reader = new CsvIterator(new FileReader(filename), "(.*?)\\t(.*?)\\t(.*)", 3, 2, 1); saver.saveInstances(serialPipe.newIteratorFrom(reader)); } saver.saveAlphabets(serialPipe.getDataAlphabet(), serialPipe.getTargetAlphabet()); saver.cleanup(); }
logger.info("Testing vectors loaded from " + testFile.value); if (!testFileIlist.getPipe().alphabetsMatch(trainingFileIlist.getPipe())) { throw new RuntimeException( trainingFileIlist.getPipe().getDataAlphabet() + "\n" + testFileIlist.getPipe().getDataAlphabet() + "\n" + trainingFileIlist.getPipe().getTargetAlphabet() + "\n" + testFileIlist.getPipe().getTargetAlphabet() + "\n" + "Training and testing alphabets don't match!\n"); validationFileIlist = InstanceList.load (new File(validationFile.value)); logger.info("validation vectors loaded from " + validationFile.value); if (!validationFileIlist.getPipe().alphabetsMatch(trainingFileIlist.getPipe())) { throw new RuntimeException( trainingFileIlist.getPipe().getDataAlphabet() + "\n" + validationFileIlist.getPipe().getDataAlphabet() + "\n" + trainingFileIlist.getPipe().getTargetAlphabet() + "\n" + validationFileIlist.getPipe().getTargetAlphabet() + "\n" + "Training and validation alphabets don't match!\n");
public void setTargetProcessing (boolean lookForAndProcessTarget) { super.setTargetProcessing (lookForAndProcessTarget); for (Pipe p : pipes) p.setTargetProcessing (lookForAndProcessTarget); }
/** Returns the <code>Alphabet</code> mapping features of the data to * integers. */ public Alphabet getDataAlphabet () { if (dataAlphabet == null && pipe != null) { dataAlphabet = pipe.getDataAlphabet (); } assert (pipe == null || pipe.getDataAlphabet () == null || pipe.getDataAlphabet () == dataAlphabet); return dataAlphabet; }
/** Returns the <code>Alphabet</code> mapping target output labels to * integers. */ public Alphabet getTargetAlphabet () { if (targetAlphabet == null && pipe != null) { targetAlphabet = pipe.getTargetAlphabet (); } assert (pipe == null || pipe.getTargetAlphabet () == null || pipe.getTargetAlphabet () == targetAlphabet); return targetAlphabet; }
/** Take input sequence from instance.data and put the output sequence in instance.data. */ public Instance transduce (Instance instance) { if (inputPipe != null) instance = inputPipe.instanceFrom(instance); // TODO Use MaxLatticeFactory instead of hardcoding instance.setData(new MaxLatticeDefault(this, (Sequence)instance.getData()).bestOutputSequence()); if (outputPipe != null) instance = outputPipe.instanceFrom(instance); return instance; }
p.getTargetAlphabet().lookupIndex(defaultOption.value); p.setTargetProcessing(true); trainingData = new InstanceList(p); trainingData.addThruPipe(new LineGroupIterator(trainingFile, Pattern.compile("^\\s*$"), true)); logger.info("Number of features in training data: "+p.getDataAlphabet().size()); p.setTargetProcessing(true); testData = new InstanceList(p); testData.addThruPipe(new LineGroupIterator(testFile, p.setTargetProcessing(false); testData = new InstanceList(p); testData.addThruPipe( Pattern.compile("^\\s*$"), true)); logger.info ("Number of predicates: "+p.getDataAlphabet().size()); if (p.isTargetProcessing()) { Alphabet targets = p.getTargetAlphabet(); StringBuffer buf = new StringBuffer("Labels:"); for (int i = 0; i < targets.size(); i++)
public void testConcatenatePipes () { Pipe p1 = new StupidPipe (); Pipe p2 = new SimpleTagger.SimpleTaggerSentence2FeatureVectorSequence (); // initialize p2's dict p2.instanceFrom(new Instance (data, null, null, null)); assertEquals (3, p2.getDataAlphabet ().size()); Pipe serial = PipeUtils.concatenatePipes (p1, p2); Alphabet dict = serial.getDataAlphabet (); assertEquals (3, dict.size ()); assertTrue (dict == p2.getDataAlphabet ()); }
public static void writeInstanceList(ArrayList<Pipe> pipes) throws Exception { Pipe serialPipe = new SerialPipes(pipes); DBInstanceStore saver = new DBInstanceStore(outputDatabase.value); for (String filename: inputFiles.value) { logger.info("importing from " + filename); CsvIterator reader = new CsvIterator(new FileReader(filename), "(.*?)\\t(.*?)\\t(.*)", 3, 2, 1); saver.saveInstances(serialPipe.newIteratorFrom(reader)); } saver.saveAlphabets(serialPipe.getDataAlphabet(), serialPipe.getTargetAlphabet()); saver.cleanup(); }
private void resolveAlphabets () { Alphabet da = null, ta = null; for (Pipe p : pipes) { p.preceedingPipeDataAlphabetNotification(da); da = p.getDataAlphabet(); p.preceedingPipeTargetAlphabetNotification(ta); ta = p.getTargetAlphabet(); } dataAlphabet = da; targetAlphabet = ta; }
logger.info("Testing vectors loaded from " + testFile.value); if (!testFileIlist.getPipe().alphabetsMatch(trainingFileIlist.getPipe())) { throw new RuntimeException( trainingFileIlist.getPipe().getDataAlphabet() + "\n" + testFileIlist.getPipe().getDataAlphabet() + "\n" + trainingFileIlist.getPipe().getTargetAlphabet() + "\n" + testFileIlist.getPipe().getTargetAlphabet() + "\n" + "Training and testing alphabets don't match!\n"); validationFileIlist = InstanceList.load (new File(validationFile.value)); logger.info("validation vectors loaded from " + validationFile.value); if (!validationFileIlist.getPipe().alphabetsMatch(trainingFileIlist.getPipe())) { throw new RuntimeException( trainingFileIlist.getPipe().getDataAlphabet() + "\n" + validationFileIlist.getPipe().getDataAlphabet() + "\n" + trainingFileIlist.getPipe().getTargetAlphabet() + "\n" + validationFileIlist.getPipe().getTargetAlphabet() + "\n" + "Training and validation alphabets don't match!\n");
public void setTargetProcessing (boolean lookForAndProcessTarget) { super.setTargetProcessing (lookForAndProcessTarget); for (Pipe p : pipes) p.setTargetProcessing (lookForAndProcessTarget); }
public void readModel(InputStream is) throws IOException, ClassNotFoundException { final GZIPInputStream gin = new GZIPInputStream(is); final ObjectInputStream ois = new ObjectInputStream(gin); model = (CRF) ois.readObject(); trained = true; model.getInputPipe().getDataAlphabet().stopGrowth(); ois.close(); }