private DFSA<String, Integer> getViterbiSearchGraph(List<IN> doc, Class<? extends CoreAnnotation<String>> answerField) { if (doc.isEmpty()) { return new DFSA<>(null); } // TODO get rid of ObjectBankWrapper ObjectBankWrapper<IN> obw = new ObjectBankWrapper<>(flags, null, knownLCWords); doc = obw.processDocument(doc); SequenceModel model = getSequenceModel(doc); return ViterbiSearchGraphBuilder.getGraph(model, classIndex); }
private List<IN> preprocessTokens(List<? extends HasWord> tokenSequence) { // log.info("knownLCWords.size is " + knownLCWords.size() + "; knownLCWords.maxSize is " + knownLCWords.getMaxSize() + // ", prior to NER for " + getClass().toString()); List<IN> document = new ArrayList<>(); int i = 0; for (HasWord word : tokenSequence) { IN wi; // initialized below if (word instanceof CoreMap) { // copy all annotations! some are required later in // AbstractSequenceClassifier.classifyWithInlineXML // wi = (IN) new ArrayCoreMap((ArrayCoreMap) word); wi = tokenFactory.makeToken((IN) word); } else { wi = tokenFactory.makeToken(); wi.set(CoreAnnotations.TextAnnotation.class, word.word()); // wi.setWord(word.word()); } wi.set(CoreAnnotations.PositionAnnotation.class, Integer.toString(i)); wi.set(CoreAnnotations.AnswerAnnotation.class, backgroundSymbol()); document.add(wi); i++; } // TODO get rid of ObjectBankWrapper ObjectBankWrapper<IN> wrapper = new ObjectBankWrapper<>(flags, null, knownLCWords); wrapper.processDocument(document); // log.info("Size of knownLCWords is " + knownLCWords.size() + ", after NER for " + getClass().toString()); return document; }
doc = obw.processDocument(doc);
private DFSA<String, Integer> getViterbiSearchGraph(List<IN> doc, Class<? extends CoreAnnotation<String>> answerField) { if (doc.isEmpty()) { return new DFSA<>(null); } // TODO get rid of ObjectBankWrapper ObjectBankWrapper<IN> obw = new ObjectBankWrapper<>(flags, null, knownLCWords); doc = obw.processDocument(doc); SequenceModel model = getSequenceModel(doc); return ViterbiSearchGraphBuilder.getGraph(model, classIndex); }
public DFSA<String, Integer> getViterbiSearchGraph(List<IN> doc, Class<? extends CoreAnnotation<String>> answerField) { if (doc.isEmpty()) { return new DFSA<String, Integer>(null); } // TODO get rid of objectbankwrapper ObjectBankWrapper<IN> obw = new ObjectBankWrapper<IN>(flags, null, knownLCWords); doc = obw.processDocument(doc); SequenceModel model = getSequenceModel(doc); return ViterbiSearchGraphBuilder.getGraph(model, classIndex); }
public DFSA<String, Integer> getViterbiSearchGraph(List<IN> doc, Class<? extends CoreAnnotation<String>> answerField) { if (doc.isEmpty()) { return new DFSA<String, Integer>(null); } // TODO get rid of objectbankwrapper ObjectBankWrapper<IN> obw = new ObjectBankWrapper<IN>(flags, null, knownLCWords); doc = obw.processDocument(doc); SequenceModel model = getSequenceModel(doc); return ViterbiSearchGraphBuilder.getGraph(model, classIndex); }
private List<IN> preprocessTokens(List<? extends HasWord> tokenSequence) { // log.info("knownLCWords.size is " + knownLCWords.size() + "; knownLCWords.maxSize is " + knownLCWords.getMaxSize() + // ", prior to NER for " + getClass().toString()); List<IN> document = new ArrayList<>(); int i = 0; for (HasWord word : tokenSequence) { IN wi; // initialized below if (word instanceof CoreMap) { // copy all annotations! some are required later in // AbstractSequenceClassifier.classifyWithInlineXML // wi = (IN) new ArrayCoreMap((ArrayCoreMap) word); wi = tokenFactory.makeToken((IN) word); } else { wi = tokenFactory.makeToken(); wi.set(CoreAnnotations.TextAnnotation.class, word.word()); // wi.setWord(word.word()); } wi.set(CoreAnnotations.PositionAnnotation.class, Integer.toString(i)); wi.set(CoreAnnotations.AnswerAnnotation.class, backgroundSymbol()); document.add(wi); i++; } // TODO get rid of ObjectBankWrapper ObjectBankWrapper<IN> wrapper = new ObjectBankWrapper<>(flags, null, knownLCWords); wrapper.processDocument(document); // log.info("Size of knownLCWords is " + knownLCWords.size() + ", after NER for " + getClass().toString()); return document; }
wrapper.processDocument(document);
wrapper.processDocument(document);
wrapper.processDocument(document);
wrapper.processDocument(document);
public Counter<List<IN>> classifyKBest(List<IN> doc, Class<? extends CoreAnnotation<String>> answerField, int k) { if (doc.isEmpty()) { return new ClassicCounter<List<IN>>(); } // TODO get rid of ObjectBankWrapper // i'm sorry that this is so hideous - JRF ObjectBankWrapper<IN> obw = new ObjectBankWrapper<IN>(flags, null, knownLCWords); doc = obw.processDocument(doc); SequenceModel model = getSequenceModel(doc); KBestSequenceFinder tagInference = new KBestSequenceFinder(); Counter<int[]> bestSequences = tagInference.kBestSequences(model, k); Counter<List<IN>> kBest = new ClassicCounter<List<IN>>(); for (int[] seq : bestSequences.keySet()) { List<IN> kth = new ArrayList<IN>(); int pos = model.leftWindow(); for (IN fi : doc) { IN newFL = tokenFactory.makeToken(fi); String guess = classIndex.get(seq[pos]); fi.remove(AnswerAnnotation.class); // because fake answers will get // added during testing newFL.set(answerField, guess); pos++; kth.add(newFL); } kBest.setCount(kth, bestSequences.getCount(seq)); } return kBest; }
doc = obw.processDocument(doc);
doc = obw.processDocument(doc);