@Override public List<IN> drawSample() { int[] sampleArray = sampler.bestSequence(model); List<IN> sample = new ArrayList<>(); int i = 0; for (IN word : input) { IN newWord = tokenFactory.makeToken(word); newWord.set(CoreAnnotations.AnswerAnnotation.class, classIndex.get(sampleArray[i++])); sample.add(newWord); } return sample; } };
private List<IN> preprocessTokens(List<? extends HasWord> tokenSequence) { // log.info("knownLCWords.size is " + knownLCWords.size() + "; knownLCWords.maxSize is " + knownLCWords.getMaxSize() + // ", prior to NER for " + getClass().toString()); List<IN> document = new ArrayList<>(); int i = 0; for (HasWord word : tokenSequence) { IN wi; // initialized below if (word instanceof CoreMap) { // copy all annotations! some are required later in // AbstractSequenceClassifier.classifyWithInlineXML // wi = (IN) new ArrayCoreMap((ArrayCoreMap) word); wi = tokenFactory.makeToken((IN) word); } else { wi = tokenFactory.makeToken(); wi.set(CoreAnnotations.TextAnnotation.class, word.word()); // wi.setWord(word.word()); } wi.set(CoreAnnotations.PositionAnnotation.class, Integer.toString(i)); wi.set(CoreAnnotations.AnswerAnnotation.class, backgroundSymbol()); document.add(wi); i++; } // TODO get rid of ObjectBankWrapper ObjectBankWrapper<IN> wrapper = new ObjectBankWrapper<>(flags, null, knownLCWords); wrapper.processDocument(document); // log.info("Size of knownLCWords is " + knownLCWords.size() + ", after NER for " + getClass().toString()); return document; }
public static <T extends CoreMap> T createCoreMap(CoreMap cm, String text, int start, int end, CoreTokenFactory<T> factory) { if (end > start) { T token = factory.makeToken(); Integer cmCharStart = cm.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class); if (cmCharStart == null) cmCharStart = 0; String tokenText = text.substring(start, end); token.set(CoreAnnotations.TextAnnotation.class, tokenText); if (token instanceof CoreLabel) { token.set(CoreAnnotations.ValueAnnotation.class, tokenText); } token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, cmCharStart + start); token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, cmCharStart + end); return token; } else { return null; } }
/** * Initialize the featureFactory and other variables based on the passed in * flags. * * @param flags A specification of the AbstractSequenceClassifier to construct. */ public AbstractSequenceClassifier(SeqClassifierFlags flags) { this.flags = flags; // Thang Sep13: allow for multiple feature factories. this.featureFactories = Generics.newArrayList(); if (flags.featureFactory != null) { FeatureFactory<IN> factory = new MetaClass(flags.featureFactory).createInstance(flags.featureFactoryArgs); // for compatibility featureFactories.add(factory); } if (flags.featureFactories != null) { for (int i = 0; i < flags.featureFactories.length; i++) { FeatureFactory<IN> indFeatureFactory = new MetaClass(flags.featureFactories[i]). createInstance(flags.featureFactoriesArgs.get(i)); this.featureFactories.add(indFeatureFactory); } } if (flags.tokenFactory == null) { tokenFactory = (CoreTokenFactory<IN>) new CoreLabelTokenFactory(); } else { this.tokenFactory = new MetaClass(flags.tokenFactory).createInstance(flags.tokenFactoryArgs); } pad = tokenFactory.makeToken(); windowSize = flags.maxLeft + 1; reinit(); }
int pos = model.leftWindow(); for (IN fi : doc) { IN newFL = tokenFactory.makeToken(fi); String guess = classIndex.get(seq[pos]); fi.remove(CoreAnnotations.AnswerAnnotation.class); // because fake answers will get
/** * Initialize the featureFactory and other variables based on the passed in * flags. * * @param flags * A specification of the AbstractSequenceClassifier to construct. */ public AbstractSequenceClassifier(SeqClassifierFlags flags) { this.flags = flags; try { this.featureFactory = (FeatureFactory) Class.forName(flags.featureFactory).newInstance(); if (flags.tokenFactory == null) tokenFactory = (CoreTokenFactory<IN>) new CoreLabelTokenFactory(); else this.tokenFactory = (CoreTokenFactory<IN>) Class.forName(flags.tokenFactory).newInstance(); } catch (Exception e) { throw new RuntimeException(e); } pad = tokenFactory.makeToken(); windowSize = flags.maxLeft + 1; reinit(); }
public List<IN> drawSample() { int[] sampleArray = sampler.bestSequence(model); List<IN> sample = new ArrayList<IN>(); int i = 0; for (IN word : input) { IN newWord = tokenFactory.makeToken(word); newWord.set(AnswerAnnotation.class, classIndex.get(sampleArray[i++])); sample.add(newWord); } return sample; } };
@Override public List<IN> drawSample() { int[] sampleArray = sampler.bestSequence(model); List<IN> sample = new ArrayList<>(); int i = 0; for (IN word : input) { IN newWord = tokenFactory.makeToken(word); newWord.set(CoreAnnotations.AnswerAnnotation.class, classIndex.get(sampleArray[i++])); sample.add(newWord); } return sample; } };
@Override public List<IN> drawSample() { int[] sampleArray = sampler.bestSequence(model); List<IN> sample = new ArrayList<IN>(); int i = 0; for (IN word : input) { IN newWord = tokenFactory.makeToken(word); newWord.set(CoreAnnotations.AnswerAnnotation.class, classIndex.get(sampleArray[i++])); sample.add(newWord); } return sample; } };
public static <T extends CoreMap> T createCoreMap(CoreMap cm, String text, int start, int end, CoreTokenFactory<T> factory) { if (end > start) { T token = factory.makeToken(); Integer cmCharStart = cm.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class); if (cmCharStart == null) cmCharStart = 0; token.set(CoreAnnotations.TextAnnotation.class, text.substring(start, end)); token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, cmCharStart + start); token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, cmCharStart + end); return token; } else { return null; } }
private List<IN> preprocessTokens(List<? extends HasWord> tokenSequence) { // log.info("knownLCWords.size is " + knownLCWords.size() + "; knownLCWords.maxSize is " + knownLCWords.getMaxSize() + // ", prior to NER for " + getClass().toString()); List<IN> document = new ArrayList<>(); int i = 0; for (HasWord word : tokenSequence) { IN wi; // initialized below if (word instanceof CoreMap) { // copy all annotations! some are required later in // AbstractSequenceClassifier.classifyWithInlineXML // wi = (IN) new ArrayCoreMap((ArrayCoreMap) word); wi = tokenFactory.makeToken((IN) word); } else { wi = tokenFactory.makeToken(); wi.set(CoreAnnotations.TextAnnotation.class, word.word()); // wi.setWord(word.word()); } wi.set(CoreAnnotations.PositionAnnotation.class, Integer.toString(i)); wi.set(CoreAnnotations.AnswerAnnotation.class, backgroundSymbol()); document.add(wi); i++; } // TODO get rid of ObjectBankWrapper ObjectBankWrapper<IN> wrapper = new ObjectBankWrapper<>(flags, null, knownLCWords); wrapper.processDocument(document); // log.info("Size of knownLCWords is " + knownLCWords.size() + ", after NER for " + getClass().toString()); return document; }
public static <T extends CoreMap> T createCoreMap(CoreMap cm, String text, int start, int end, CoreTokenFactory<T> factory) { if (end > start) { T token = factory.makeToken(); Integer cmCharStart = cm.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class); if (cmCharStart == null) cmCharStart = 0; String tokenText = text.substring(start, end); token.set(CoreAnnotations.TextAnnotation.class, tokenText); if (token instanceof CoreLabel) { token.set(CoreAnnotations.ValueAnnotation.class, tokenText); } token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, cmCharStart + start); token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, cmCharStart + end); return token; } else { return null; } }
public static <T extends CoreMap> T createCoreMap(CoreMap cm, String text, int start, int end, CoreTokenFactory<T> factory) { if (end > start) { T token = factory.makeToken(); Integer cmCharStart = cm.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class); if (cmCharStart == null) cmCharStart = 0; String tokenText = text.substring(start, end); token.set(CoreAnnotations.TextAnnotation.class, tokenText); if (token instanceof CoreLabel) { token.set(CoreAnnotations.ValueAnnotation.class, tokenText); } token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, cmCharStart + start); token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, cmCharStart + end); return token; } else { return null; } }
/** * Initialize the featureFactory and other variables based on the passed in * flags. * * @param flags A specification of the AbstractSequenceClassifier to construct. */ public AbstractSequenceClassifier(SeqClassifierFlags flags) { this.flags = flags; // Thang Sep13: allow for multiple feature factories. this.featureFactories = Generics.newArrayList(); if (flags.featureFactory != null) { FeatureFactory<IN> factory = new MetaClass(flags.featureFactory).createInstance(flags.featureFactoryArgs); // for compatibility featureFactories.add(factory); } if (flags.featureFactories != null) { for (int i = 0; i < flags.featureFactories.length; i++) { FeatureFactory<IN> indFeatureFactory = new MetaClass(flags.featureFactories[i]). createInstance(flags.featureFactoriesArgs.get(i)); this.featureFactories.add(indFeatureFactory); } } if (flags.tokenFactory == null) { tokenFactory = (CoreTokenFactory<IN>) new CoreLabelTokenFactory(); } else { this.tokenFactory = new MetaClass(flags.tokenFactory).createInstance(flags.tokenFactoryArgs); } pad = tokenFactory.makeToken(); windowSize = flags.maxLeft + 1; reinit(); }
IN previous = tokenFactory.makeToken(); StringBuilder prepend = new StringBuilder();
pad = tokenFactory.makeToken(); windowSize = flags.maxLeft + 1; reinit();