@Override public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) { List<Feature> features = new ArrayList<Feature>(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { // Filter down to selected features features.addAll(Collections2.filter(((TransformableFeature) feature).getFeatures(), this)); } else { // Pass non-relevant features through w/o filtering features.add(feature); } } return new Instance<OUTCOME_T>(instance.getOutcome(), features); }
public Instance(OUTCOME_TYPE outcome, Collection<Feature> features) { this(outcome); addAll(features); }
@Override public void setClassLabel( IdentifiedAnnotation entityOrEventMention, Instance<String> instance ) throws AnalysisEngineProcessException { if ( this.isTraining() ) { int history = entityOrEventMention.getHistoryOf(); // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling if ( history == CONST.NE_HISTORY_OF_ABSENT && coin.nextDouble() >= this.probabilityOfKeepingADefaultExample ) { return; } instance.setOutcome( String.valueOf( history ) ); } else { String label = this.classifier.classify( instance.getFeatures() ); entityOrEventMention.setHistoryOf( Integer.parseInt( label ) ); } }
public void write(List<Instance<OUTCOME_TYPE>> instances) throws CleartkProcessingException { if (this.delegatedDataWriter == null) throw new IllegalStateException( "delegatedDataWriter must be set before calling writeSequence"); List<Object> outcomes = new ArrayList<Object>(); for (Instance<OUTCOME_TYPE> instance : instances) { List<Feature> instanceFeatures = instance.getFeatures(); for (OutcomeFeatureExtractor outcomeFeatureExtractor : outcomeFeatureExtractors) { instanceFeatures.addAll(outcomeFeatureExtractor.extractFeatures(outcomes)); } outcomes.add(instance.getOutcome()); delegatedDataWriter.write(instance); } }
/** * @param outcome * the outcome of the returned instance * @param featureData * an even number of elements corresponding to name/value pairs used to create features * @return a single instance with the provided outcome and features corresponding to the * featureData provided */ public static <T> Instance<T> createInstance(T outcome, Object... featureData) { if (featureData.length % 2 != 0) { throw new IllegalArgumentException( "feature data must consist of an even number of elements corresponding to name/value pairs used to create features. "); } Instance<T> instance = new Instance<T>(outcome); for (int i = 0; i < featureData.length;) { instance.add(new Feature(featureData[i].toString(), featureData[i + 1])); i += 2; } return instance; }
Instance<String> instance = new Instance<>(); instance.add( new Feature( "Domain", domainFeature ) ); instance.addAll( extractor .extractWithin( annotationView, identifiedAnnotation, coveringSent ) ); instance.add( new Feature( "ClosestCue_Word", closestCue.getCoveredText() ) ); instance.add( new Feature( "ClosestCue_PhraseFamily", closestCue.getCuePhraseAssertionFamily() ) ); instance.add( new Feature( "ClosestCue_PhraseCategory", closestCue.getCuePhraseCategory() ) ); instance.addAll( ffDomainAdaptor .apply( new Feature( "ClosestCue_Word", closestCue.getCoveredText() ) ) ); instance.addAll( ffDomainAdaptor .apply( new Feature( "ClosestCue_PhraseFamily", closestCue .getCuePhraseAssertionFamily() ) ) ); instance.addAll( ffDomainAdaptor .apply( new Feature( "ClosestCue_PhraseCategory", closestCue.getCuePhraseCategory() ) ) ); instance.add( new Feature( "ENTITY_TYPE_ANAT_SITE" ) ); instance.addAll( ffDomainAdaptor.apply( new Feature( "ENTITY_TYPE_ANAT_SITE" ) ) ); instance.addAll( extractor.extract( jCas, identifiedAnnotation ) ); instance.addAll( extractor.extract( jCas, identifiedAnnotation ) ); List<Feature> feats = instance.getFeatures();
public void process(JCas jCas) throws AnalysisEngineProcessException { DocumentAnnotation doc = (DocumentAnnotation) jCas.getDocumentAnnotationFs(); Instance<String> instance = new Instance<String>(); instance.addAll(this.extractor.extract(jCas, doc)); if (isTraining()) { UsenetDocument document = JCasUtil.selectSingle(jCas, UsenetDocument.class); instance.setOutcome(document.getCategory()); this.dataWriter.write(instance); } else { // This is classification, so classify and create UsenetDocument annotation String result = this.classifier.classify(instance.getFeatures()); UsenetDocument document = new UsenetDocument(jCas, 0, jCas.getDocumentText().length()); document.setCategory(result); document.addToIndexes(); // System.out.println("classified " + ViewURIUtil.getURI(jCas) + " as " + result + "."); } }
/** * Create a list of Instances from a list of outcomes and a list of feature-lists. * * There must be exactly one outcome for each feature list. * * @param outcomes * The list of classifier outcomes. * @param featureLists * The list of classifier feature-lists. * @return A list of Instances produced by matchin the outcomes and feature-lists pairwise. */ public static <OUTCOME_TYPE> List<Instance<OUTCOME_TYPE>> toInstances( List<OUTCOME_TYPE> outcomes, List<List<Feature>> featureLists) { int nOutcomes = outcomes.size(); int nFeatureLists = featureLists.size(); if (nOutcomes != nFeatureLists) { String message = "expected the same number of outcomes (%d) as featureLists (%d)"; throw new IllegalArgumentException(String.format(message, nOutcomes, nFeatureLists)); } List<Instance<OUTCOME_TYPE>> instances = new ArrayList<Instance<OUTCOME_TYPE>>(); for (int i = 0; i < nOutcomes; ++i) { instances.add(new Instance<OUTCOME_TYPE>(outcomes.get(i), featureLists.get(i))); } return instances; }
attribute = this.getDefaultValue(); Instance<OUTCOME_TYPE> instance = new Instance<OUTCOME_TYPE>(); instance.addAll(features); instance.setOutcome(attribute); this.dataWriter.write(instance);
@Override public void setClassLabel( IdentifiedAnnotation entityOrEventMention, Instance<String> instance ) throws AnalysisEngineProcessException { if ( this.isTraining() ) { String subj = entityOrEventMention.getSubject(); // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling if ( "patient".equals( subj ) && coin.nextDouble() >= this.probabilityOfKeepingADefaultExample ) { return; } instance.setOutcome( subj ); logger.log( Level.DEBUG, String.format( "[%s] expected: ''; actual: ''; features: %s", this.getClass().getSimpleName(), instance.toString() ) ); } else { String label = this.classifier.classify( instance.getFeatures() ); entityOrEventMention.setSubject( label ); logger.log( Level.DEBUG, "SUBJECT is being set on an IdentifiedAnnotation: " + label + " " + entityOrEventMention.getSubject() ); } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { for (Sentence sentence : JCasUtil.select(jcas, Sentence.class)) { Instance<Boolean> instance = new Instance<Boolean>(false, this.extractor.extract( jcas, sentence)); if (this.isTraining()) { this.dataWriter.write(instance); } else { Map<Boolean, Double> scoredOutcomes = this.classifier.score(instance.getFeatures()); Double trueScore = scoredOutcomes.get(true); if (trueScore > 0.0) { SummarySentence extractedSentence = new SummarySentence( jcas, sentence.getBegin(), sentence.getEnd()); extractedSentence.setScore(trueScore); extractedSentence.addToIndexes(); } } } }
protected List<OUTCOME_TYPE> classify(List<Instance<OUTCOME_TYPE>> instances) throws CleartkProcessingException { List<List<Feature>> instanceFeatures = new ArrayList<List<Feature>>(); for (Instance<OUTCOME_TYPE> instance : instances) { instanceFeatures.add(instance.getFeatures()); } return this.classifier.classify(instanceFeatures); }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { LOGGER.info( "Processing ..." ); for(Markable markable : JCasUtil.select(jcas, Markable.class)){ boolean outcome; List<Feature> features = new ArrayList<>(); for(FeatureExtractor1<Markable> extractor : extractors){ features.addAll(extractor.extract(jcas, markable)); } Instance<Boolean> instance = new Instance<>(features); if(this.isTraining()){ outcome = markable.getConfidence() > 0.5; instance.setOutcome(outcome); this.dataWriter.write(instance); }else{ Map<Boolean,Double> outcomes = this.classifier.score(features); markable.setConfidence(outcomes.get(true).floatValue()); } } LOGGER.info( "Finished." ); } }
@Override public void write(Instance<OUTCOME_TYPE> instance) throws CleartkProcessingException { this.trainingDataWriter.println(instance.getOutcome()); }
Instance<String> instance = new Instance<>(); instance.add( new Feature( "Domain", domainFeature ) ); instance.addAll( extractor .extractWithin( annotationView, identifiedAnnotation, coveringSent ) ); instance.add( new Feature( "ClosestCue_Word", closestCue.getCoveredText() ) ); instance.add( new Feature( "ClosestCue_PhraseFamily", closestCue.getCuePhraseAssertionFamily() ) ); instance.add( new Feature( "ClosestCue_PhraseCategory", closestCue.getCuePhraseCategory() ) ); instance.addAll( ffDomainAdaptor .apply( new Feature( "ClosestCue_Word", closestCue.getCoveredText() ) ) ); instance.addAll( ffDomainAdaptor .apply( new Feature( "ClosestCue_PhraseFamily", closestCue .getCuePhraseAssertionFamily() ) ) ); instance.addAll( ffDomainAdaptor .apply( new Feature( "ClosestCue_PhraseCategory", closestCue.getCuePhraseCategory() ) ) ); instance.add( new Feature( "ENTITY_TYPE_ANAT_SITE" ) ); instance.addAll( ffDomainAdaptor.apply( new Feature( "ENTITY_TYPE_ANAT_SITE" ) ) ); instance.addAll( extractor.extract( jCas, identifiedAnnotation ) ); instance.addAll( extractor.extract( jCas, identifiedAnnotation ) ); List<Feature> feats = instance.getFeatures();
@Override public void write(Instance<OUTCOME_TYPE> instance) throws CleartkProcessingException { if (instance.getOutcome() == null) { throw CleartkProcessingException.noInstanceOutcome(instance.getFeatures()); } String outcome = this.classifierBuilder.getOutcomeEncoder().encode(instance.getOutcome()); ContextValues contextValues = this.classifierBuilder.getFeaturesEncoder().encodeAll(instance.getFeatures()); this.trainingDataWriter.printf("%s %s\n", outcome, contextValues.toMaxentString()); } }
Instance<String> instance = new Instance<String>(); for (FeatureExtractor1<Token> extractor : this.sourceFeatureExtractors) { instance.addAll(extractor.extract(jCas, sourceToken)); instance.addAll(extractor.extract(jCas, targetToken)); instance.addAll(extractor.extract(jCas, windowAnnotation)); instance.addAll(this.pathExtractor.extract(jCas, link.source, link.target)); TemporalLink tlink = tlinks.remove(key); if (tlink != null) { instance.setOutcome(tlink.getRelationType()); this.dataWriter.write(instance); String relationType = this.classifier.classify(instance.getFeatures()); TemporalLink tlink = new TemporalLink(jCas, docEnd, docEnd); tlink.setSource(source);
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { for (Time time : JCasUtil.select(jCas, Time.class)) { List<Feature> features = new ArrayList<Feature>(); for (FeatureExtractor1<Time> extractor : this.featuresExtractors) { features.addAll(extractor.extract(jCas, time)); } if (this.isTraining()) { this.dataWriter.write(new Instance<String>(time.getTimeType(), features)); } else { time.setTimeType(this.classifier.classify(features)); } } }
TOKEN_TYPE token = (TOKEN_TYPE) tokens.next(); List<Feature> features = featureExtractor.extractFeatures(jCas, token, sentence); Instance<String> instance = new Instance<String>(); instance.addAll(features); instance.setOutcome(getTag(jCas, token)); instances.add(instance);
@Override public void setClassLabel(IdentifiedAnnotation entityOrEventMention, Instance<String> instance) throws AnalysisEngineProcessException { if (this.isTraining()) { String subj = entityOrEventMention.getSubject(); // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling if ("patient".equals(subj) && coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) { return; } instance.setOutcome(subj); logger.log(Level.DEBUG, String.format("[%s] expected: ''; actual: ''; features: %s", this.getClass().getSimpleName(), instance.toString() )); } else { String label = this.classifier.classify(instance.getFeatures()); entityOrEventMention.setSubject(label); logger.log(Level.DEBUG, "SUBJECT is being set on an IdentifiedAnnotation: "+label+" "+entityOrEventMention.getSubject()); } } public static FeatureSelection<String> createFeatureSelection(double threshold) {