public SequencePairAlignment generatePath () { if (isGenerative() == false) throw new IllegalStateException ("Transducer is not generative."); ArrayList initialStates = new ArrayList (); Iterator iter = initialStateIterator (); while (iter.hasNext()) { initialStates.add (iter.next()); } // xxx Not yet finished. throw new UnsupportedOperationException (); }
public void print () { logger.fine ("Transducer "+this); printStates(); }
/** * This method classifies several instances at once * * @param features * a list of lists of features - each list in the list represents one instance to be * classified. The list should correspond to some logical sequence of instances to be * classified (e.g. tokens in a sentence or lines in a document) that corresponds to the * model that has been built for this classifier. */ public List<String> classify(List<List<Feature>> features) throws CleartkProcessingException { String[][] featureStringArray = toStrings(features); Pipe pipe = transducer.getInputPipe(); Instance instance = new Instance(featureStringArray, null, null, null); instance = pipe.instanceFrom(instance); Sequence<?> data = (Sequence<?>) instance.getData(); Sequence<?> untypedSequence = transducer.transduce(data); Sequence<String> sequence = ReflectionUtil.uncheckedCast(untypedSequence); List<String> returnValues = new ArrayList<String>(); for (int i = 0; i < sequence.size(); i++) { String encodedOutcome = sequence.get(i); returnValues.add(outcomeEncoder.decode(encodedOutcome)); } return returnValues; }
int numStates = t.numStates(); nodes = new LatticeNode[latticeLength][numStates]; boolean atLeastOneInitialState = false; for (int i = 0; i < numStates; i++) { double initialWeight = t.getState(i).getInitialWeight(); State s = t.getState(i); TransitionIterator iter = s.transitionIterator (input, ip, output, ip); if (logger.isLoggable (Level.FINE)) if (logger.isLoggable (Level.FINE)) logger.fine ("BEFORE update: destinationNode.alpha="+destinationNode.alpha); destinationNode.alpha = Transducer.sumLogProb (destinationNode.alpha, nodes[ip][i].alpha + transitionWeight); if (logger.isLoggable (Level.FINE)) logger.fine ("transitionWeight="+transitionWeight+" nodes["+ip+"]["+i+"].alpha="+nodes[ip][i].alpha totalWeight = Transducer.sumLogProb (totalWeight, (nodes[latticeLength-1][i].alpha + t.getState(i).getFinalWeight())); State s = t.getState(i); nodes[latticeLength-1][i].beta = s.getFinalWeight(); gammas[latticeLength-1][i] = nodes[latticeLength-1][i].alpha + nodes[latticeLength-1][i].beta - totalWeight; State s = t.getState(i); TransitionIterator iter = s.transitionIterator (input, ip, output, ip); while (iter.hasNext()) { double oldBeta = nodes[ip][i].beta; assert (!Double.isNaN(nodes[ip][i].beta));
/** * Apply a transducer to an input sequence to produce the k highest-scoring * output sequences. * * @param model the <code>Transducer</code> * @param input the input sequence * @param k the number of answers to return * @return array of the k highest-scoring output sequences */ public static Sequence[] apply(Transducer model, Sequence input, int k) { Sequence[] answers; if (k == 1) { answers = new Sequence[1]; answers[0] = model.transduce (input); } else { MaxLatticeDefault lattice = new MaxLatticeDefault (model, input, null, cacheSizeOption.value()); answers = lattice.bestOutputSequences(k).toArray(new Sequence[0]); } return answers; }
public List<TagResult> tag(Alignment alignment, int nBest) { Instance instance = new Instance(alignment, null, null, null); instance = tduc.getInputPipe().instanceFrom(instance); Sequence inSeq = (Sequence) instance.getData(); List<Sequence<Object>> outSeqs = tduc.getMaxLatticeFactory().newMaxLattice(tduc, inSeq).bestOutputSequences(nBest); ArrayList<TagResult> results = Lists.newArrayListWithCapacity(outSeqs.size()); double z = tduc.getSumLatticeFactory().newSumLattice(tduc, inSeq).getTotalWeight(); for (Sequence<Object> outSeq : outSeqs) { double score = tduc.getSumLatticeFactory().newSumLattice(tduc, inSeq, outSeq).getTotalWeight(); results.add(makeTagResult(outSeq, score - z)); } return results; }
public LatticeNode getLatticeNode(int ip, int si) { if (nodes[ip][si] == null) { nodes[ip][si] = new LatticeNode(ip, transducer.getState(si)); } return nodes[ip][si]; }
private void lookAhead() { if (weights != null && !found) { for (; prev < t.numStates(); prev++) if (weights[prev] > Transducer.IMPOSSIBLE_WEIGHT) { found = true; return; } } } public boolean hasNext() {
private void normalizeCosts () { // Normalize the next-state costs, so they are -(log-probabilities) // This is the heart of the difference between the locally-normalized MEMM // and the globally-normalized CRF sum = Transducer.IMPOSSIBLE_WEIGHT; for (int i = 0; i < weights.length; i++) sum = sumLogProb (sum, weights[i]); assert (!Double.isNaN (sum)); if (!Double.isInfinite (sum)) { for (int i = 0; i < weights.length; i++) weights[i] -= sum; } }
/** * load a previously trained FeatureSubsetModel (CRF4+Properties) which was stored as serialized object to disk. * * @param is * input stream of the serialized featureSubsetModel */ public void readModel(InputStream is) throws IOException, FileNotFoundException, ClassNotFoundException { final GZIPInputStream gin = new GZIPInputStream(is); final ObjectInputStream ois = new ObjectInputStream(gin); final FeatureSubsetModel fsm = (FeatureSubsetModel) ois.readObject(); ois.close(); model = fsm.getModel(); featureConfig = fsm.getFeatureConfig(); trained = true; if (model instanceof MaxEnt) { ((MaxEnt) model).getInstancePipe().getDataAlphabet().stopGrowth(); max_ent = true; } else { ((Transducer) model).getInputPipe().getDataAlphabet().stopGrowth(); } }
constraints[c] = 0; for (int i=requiredSegment.getStart (); i <= requiredSegment.getEnd(); i++) { int si = t.stateIndexOfString ((String)constrainedSequence.get (i)); if (si == -1) logger.warning ("Could not find state " + constrainedSequence.get (i) + ". Check that state labels match startTages and inTags, and that all labels are seen in training data."); int statei = t.stateIndexOfString (endTag); if (statei == -1) throw new IllegalArgumentException ("Could not find state " + endTag + ". Check that state labels match startTags and InTags.");
/** * This method classifies several instances at once * * @param features * a list of lists of features - each list in the list represents one instance to be * classified. The list should correspond to some logical sequence of instances to be * classified (e.g. tokens in a sentence or lines in a document) that corresponds to the * model that has been built for this classifier. */ public List<String> classify(List<List<Feature>> features) throws CleartkProcessingException { String[][] featureStringArray = toStrings(features); Pipe pipe = transducer.getInputPipe(); Instance instance = new Instance(featureStringArray, null, null, null); instance = pipe.instanceFrom(instance); Sequence<?> data = (Sequence<?>) instance.getData(); Sequence<?> untypedSequence = transducer.transduce(data); Sequence<String> sequence = ReflectionUtil.uncheckedCast(untypedSequence); List<String> returnValues = new ArrayList<String>(); for (int i = 0; i < sequence.size(); i++) { String encodedOutcome = sequence.get(i); returnValues.add(outcomeEncoder.decode(encodedOutcome)); } return returnValues; }
int numStates = t.numStates(); nodes = new LatticeNode[latticeLength][numStates]; boolean atLeastOneInitialState = false; for (int i = 0; i < numStates; i++) { double initialWeight = t.getState(i).getInitialWeight(); State s = t.getState(i); TransitionIterator iter = s.transitionIterator (input, ip, output, ip); if (logger.isLoggable (Level.FINE)) if (logger.isLoggable (Level.FINE)) logger.fine ("BEFORE update: destinationNode.alpha="+destinationNode.alpha); destinationNode.alpha = Transducer.sumLogProb (destinationNode.alpha, nodes[ip][i].alpha + transitionWeight); if (logger.isLoggable (Level.FINE)) logger.fine ("transitionWeight="+transitionWeight+" nodes["+ip+"]["+i+"].alpha="+nodes[ip][i].alpha totalWeight = Transducer.sumLogProb (totalWeight, (nodes[latticeLength-1][i].alpha + t.getState(i).getFinalWeight())); State s = t.getState(i); nodes[latticeLength-1][i].beta = s.getFinalWeight(); gammas[latticeLength-1][i] = nodes[latticeLength-1][i].alpha + nodes[latticeLength-1][i].beta - totalWeight; State s = t.getState(i); TransitionIterator iter = s.transitionIterator (input, ip, output, ip); while (iter.hasNext()) { double oldBeta = nodes[ip][i].beta; assert (!Double.isNaN(nodes[ip][i].beta));
/** * Apply a transducer to an input sequence to produce the k highest-scoring * output sequences. * * @param model the <code>Transducer</code> * @param input the input sequence * @param k the number of answers to return * @return array of the k highest-scoring output sequences */ public static Sequence[] apply(Transducer model, Sequence input, int k) { Sequence[] answers; if (k == 1) { answers = new Sequence[1]; answers[0] = model.transduce (input); } else { MaxLatticeDefault lattice = new MaxLatticeDefault (model, input, null, cacheSizeOption.value()); answers = lattice.bestOutputSequences(k).toArray(new Sequence[0]); } return answers; }
private LatticeNode getLatticeNode (int ip, int stateIndex) { if (nodes[ip][stateIndex] == null) nodes[ip][stateIndex] = new LatticeNode (ip, t.getState (stateIndex)); return nodes[ip][stateIndex]; }
private void lookAhead() { if (weights != null && !found) { for (; prev < t.numStates(); prev++) if (weights[prev] > Transducer.IMPOSSIBLE_WEIGHT) { found = true; return; } } } public boolean hasNext() {
private void normalizeCosts () { // Normalize the next-state costs, so they are -(log-probabilities) // This is the heart of the difference between the locally-normalized MEMM // and the globally-normalized CRF sum = Transducer.IMPOSSIBLE_WEIGHT; for (int i = 0; i < weights.length; i++) sum = sumLogProb (sum, weights[i]); assert (!Double.isNaN (sum)); if (!Double.isInfinite (sum)) { for (int i = 0; i < weights.length; i++) weights[i] -= sum; } }
/** * load a previously trained FeatureSubsetModel (CRF4+Properties) which was stored as serialized object to disk. * * @param is * input stream of the serialized featureSubsetModel */ public void readModel(InputStream is) throws IOException, FileNotFoundException, ClassNotFoundException { final GZIPInputStream gin = new GZIPInputStream(is); final ObjectInputStream ois = new ObjectInputStream(gin); final FeatureSubsetModel fsm = (FeatureSubsetModel) ois.readObject(); ois.close(); model = fsm.getModel(); featureConfig = fsm.getFeatureConfig(); trained = true; if (model instanceof MaxEnt) { ((MaxEnt) model).getInstancePipe().getDataAlphabet().stopGrowth(); max_ent = true; } else { ((Transducer) model).getInputPipe().getDataAlphabet().stopGrowth(); } }
constraints[c] = 0; for (int i=requiredSegment.getStart (); i <= requiredSegment.getEnd(); i++) { int si = t.stateIndexOfString ((String)constrainedSequence.get (i)); if (si == -1) logger.warning ("Could not find state " + constrainedSequence.get (i) + ". Check that state labels match startTages and inTags, and that all labels are seen in training data."); int statei = t.stateIndexOfString (endTag); if (statei == -1) throw new IllegalArgumentException ("Could not find state " + endTag + ". Check that state labels match startTags and InTags.");