protected List<OUTCOME_TYPE> classify(List<Instance<OUTCOME_TYPE>> instances) throws CleartkProcessingException { List<List<Feature>> instanceFeatures = new ArrayList<List<Feature>>(); for (Instance<OUTCOME_TYPE> instance : instances) { instanceFeatures.add(instance.getFeatures()); } return this.classifier.classify(instanceFeatures); }
/** * Recursively look through all features in an instance to find the features this extractor is * responsible for extracting */ protected List<TransformableFeature> selectTransformableFeatures(Instance<OUTCOME_T> instance) { List<TransformableFeature> features = new ArrayList<TransformableFeature>(); Stack<Feature> featuresToProcess = new Stack<Feature>(); featuresToProcess.addAll(instance.getFeatures()); while (!featuresToProcess.isEmpty()) { Feature currFeature = featuresToProcess.pop(); if (currFeature instanceof TransformableFeature) { TransformableFeature transformable = (TransformableFeature) currFeature; if (transformable.getName().equals(this.name)) { features.add(transformable); } else { featuresToProcess.addAll(transformable.getFeatures()); } } } return features; }
public void write(List<Instance<OUTCOME_TYPE>> instances) throws CleartkProcessingException { if (this.delegatedDataWriter == null) throw new IllegalStateException( "delegatedDataWriter must be set before calling writeSequence"); List<Object> outcomes = new ArrayList<Object>(); for (Instance<OUTCOME_TYPE> instance : instances) { List<Feature> instanceFeatures = instance.getFeatures(); for (OutcomeFeatureExtractor outcomeFeatureExtractor : outcomeFeatureExtractors) { instanceFeatures.addAll(outcomeFeatureExtractor.extractFeatures(outcomes)); } outcomes.add(instance.getOutcome()); delegatedDataWriter.write(instance); } }
@Override public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) { List<Feature> features = new ArrayList<Feature>(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { // Filter down to selected features features.addAll(Collections2.filter(((TransformableFeature) feature).getFeatures(), this)); } else { // Pass non-relevant features through w/o filtering features.add(feature); } } return new Instance<OUTCOME_T>(instance.getOutcome(), features); }
@Override public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) { List<Feature> features = new ArrayList<Feature>(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { // Filter down to selected features features.addAll(Collections2.filter(((TransformableFeature) feature).getFeatures(), this)); } else { // Pass non-relevant features through w/o filtering features.add(feature); } } return new Instance<OUTCOME_T>(instance.getOutcome(), features); }
@Override public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) { List<Feature> features = new ArrayList<Feature>(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { // Filter down to selected features features.addAll(Collections2.filter(((TransformableFeature) feature).getFeatures(), this)); } else { // Pass non-relevant features through w/o filtering features.add(feature); } } return new Instance<OUTCOME_T>(instance.getOutcome(), features); }
@Override public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) { List<Feature> features = new ArrayList<Feature>(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { for (Feature origFeature : ((TransformableFeature) feature).getFeatures()) { features.add(this.transform(origFeature)); } } else { features.add(feature); } } return new Instance<OUTCOME_T>(instance.getOutcome(), features); }
@Override public void setClassLabel( IdentifiedAnnotation entityOrEventMention, Instance<String> instance ) throws AnalysisEngineProcessException { if ( this.isTraining() ) { int history = entityOrEventMention.getHistoryOf(); // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling if ( history == CONST.NE_HISTORY_OF_ABSENT && coin.nextDouble() >= this.probabilityOfKeepingADefaultExample ) { return; } instance.setOutcome( String.valueOf( history ) ); } else { String label = this.classifier.classify( instance.getFeatures() ); entityOrEventMention.setHistoryOf( Integer.parseInt( label ) ); } }
@Override public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) { List<Feature> features = new ArrayList<Feature>(); List<Feature> featuresToTransform = new ArrayList<Feature>(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { // Store off features for later similarity computation featuresToTransform.addAll(((TransformableFeature) feature).getFeatures()); } else { // pass through non-transformable features features.add(feature); } } // Create centroid similarity feature Map<String, Double> featureMap = this.featuresToFeatureMap(featuresToTransform); features.add(new Feature(this.name, new Double(this.simFunction.distance( featureMap, centroidMap)))); return new Instance<OUTCOME_T>(instance.getOutcome(), features); }
@Override public void write(Instance<OUTCOME_TYPE> instance) throws CleartkProcessingException { if (instance.getOutcome() == null) { throw CleartkProcessingException.noInstanceOutcome(instance.getFeatures()); } String outcome = this.classifierBuilder.getOutcomeEncoder().encode(instance.getOutcome()); ContextValues contextValues = this.classifierBuilder.getFeaturesEncoder().encodeAll(instance.getFeatures()); this.trainingDataWriter.printf("%s %s\n", outcome, contextValues.toMaxentString()); } }
@Override public void setClassLabel(IdentifiedAnnotation entityOrEventMention, Instance<String> instance) throws AnalysisEngineProcessException { if (this.isTraining()) { int history = entityOrEventMention.getHistoryOf(); // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling if (history == CONST.NE_HISTORY_OF_ABSENT && coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) { return; } instance.setOutcome(String.valueOf(history)); } else { String label = this.classifier.classify(instance.getFeatures()); entityOrEventMention.setHistoryOf(Integer.parseInt(label)); } } public static FeatureSelection<String> createFeatureSelection(double threshold) {
@Override public void setClassLabel( IdentifiedAnnotation entityOrEventMention, Instance<String> instance ) throws AnalysisEngineProcessException { if ( this.isTraining() ) { boolean generic = entityOrEventMention.getGeneric(); // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling if ( !generic && coin.nextDouble() >= this.probabilityOfKeepingADefaultExample ) { return; } instance.setOutcome( "" + generic ); } else { String label = this.classifier.classify( instance.getFeatures() ); entityOrEventMention.setGeneric( Boolean.parseBoolean( label ) ); } }
@Override public void setClassLabel( IdentifiedAnnotation entityOrEventMention, Instance<String> instance ) throws AnalysisEngineProcessException { if ( this.isTraining() ) { boolean conditional = entityOrEventMention.getConditional(); // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling if ( !conditional && coin.nextDouble() >= this.probabilityOfKeepingADefaultExample ) { return; } instance.setOutcome( "" + conditional ); } else { String label = this.classifier.classify( instance.getFeatures() ); boolean conditional = false; if ( label != null ) { conditional = Boolean.parseBoolean( label ); } entityOrEventMention.setConditional( conditional ); } }
@Override public void setClassLabel(IdentifiedAnnotation entityOrEventMention, Instance<String> instance) throws AnalysisEngineProcessException { if (this.isTraining()) { boolean generic = entityOrEventMention.getGeneric(); // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling if (!generic && coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) { return; } instance.setOutcome(""+generic); } else { String label = this.classifier.classify(instance.getFeatures()); entityOrEventMention.setGeneric(Boolean.parseBoolean(label)); } } public static FeatureSelection<String> createFeatureSelection(double threshold) {
public void write(Instance<OUTCOME_TYPE> instance) throws CleartkProcessingException { writeEncoded( this.classifierBuilder.getFeaturesEncoder().encodeAll(instance.getFeatures()), this.classifierBuilder.getOutcomeEncoder().encode(instance.getOutcome())); }
public void write(List<Instance<OUTCOME_TYPE>> instances) throws CleartkProcessingException { for (Instance<OUTCOME_TYPE> instance : instances) { writeEncoded( this.classifierBuilder.getFeaturesEncoder().encodeAll(instance.getFeatures()), this.classifierBuilder.getOutcomeEncoder().encode(instance.getOutcome())); } this.writeEndSequence(); }
@Override public void write(Instance<Double> instance) throws CleartkProcessingException { if (!(instance instanceof QidInstance)) { throw new CleartkProcessingException("", "Unable to write non-QidInstance"); } String qid = ((QidInstance<Double>) instance).getQid(); writeEncoded( this.getEncodedQid(qid), this.classifierBuilder.getFeaturesEncoder().encodeAll(instance.getFeatures()), this.classifierBuilder.getOutcomeEncoder().encode(instance.getOutcome())); }
@Override public void write(List<Instance<OUTCOME_TYPE>> instances) throws CleartkProcessingException { try { for (Instance<OUTCOME_TYPE> instance : instances) { writeEncoded( this.classifierBuilder.getFeaturesEncoder().encodeAll(instance.getFeatures()), this.classifierBuilder.getOutcomeEncoder().encode(instance.getOutcome())); } writeSequenceEnd(); } catch (IOException e) { throw new CleartkProcessingException(e); } }
@Override public void write(Instance<Double> instance) throws CleartkProcessingException { if (!(instance instanceof QidInstance)) { throw new CleartkProcessingException("", "Unable to write non-QidInstance"); } String qid = ((QidInstance<Double>) instance).getQid(); writeEncoded( this.getEncodedQid(qid), this.classifierBuilder.getFeaturesEncoder().encodeAll(instance.getFeatures()), this.classifierBuilder.getOutcomeEncoder().encode(instance.getOutcome())); }
public void process(JCas jCas) throws AnalysisEngineProcessException { DocumentAnnotation doc = (DocumentAnnotation) jCas.getDocumentAnnotationFs(); Instance<String> instance = new Instance<String>(); instance.addAll(this.extractor.extract(jCas, doc)); if (isTraining()) { UsenetDocument document = JCasUtil.selectSingle(jCas, UsenetDocument.class); instance.setOutcome(document.getCategory()); this.dataWriter.write(instance); } else { // This is classification, so classify and create UsenetDocument annotation String result = this.classifier.classify(instance.getFeatures()); UsenetDocument document = new UsenetDocument(jCas, 0, jCas.getDocumentText().length()); document.setCategory(result); document.addToIndexes(); // System.out.println("classified " + ViewURIUtil.getURI(jCas) + " as " + result + "."); } }