private boolean sanityCheckValue(Feature f) { if (f.getValue() instanceof Number) { return true; } if (f.getName().equals(Constants.ID_FEATURE_NAME)) { return false; } try { Double.valueOf((String) f.getValue()); } catch (Exception e) { throw new IllegalArgumentException( "Feature [" + f.getName() + "] has a non-numeric value [" + f.getValue() + "]", e); } return false; }
public void collectMetaData(List<Instance> instances) { featureNames = new TreeSet<>(); for (Feature f : instances.get(0).getFeatures()) { featureNames.add(f.getName()); if (!featDesc.containsKey(f.getName())) { featDesc.put(f.getName(), f.getType()); } if (f.getType() == FeatureType.NOMINAL) { enumFeatureName.put(f.getName(), f.getValue().getClass().getName()); } } didCollect = true; }
public Set<Feature> extract(JCas aView, TextClassificationTarget aTarget) throws TextClassificationException { Feature feature = new Feature(FEATURE_NAME, aTarget.getCoveredText().contains( ".") ? 1 : 0); Set<Feature> features = new HashSet<Feature>(); features.add(feature); return features; }
private String getValue(Feature feature) { if (feature.getType().equals(FeatureType.STRING) || feature.getType().equals(FeatureType.NOMINAL)) { String value = feature.getValue().toString(); String idx = stringToIntegerMap.get(value); if (idx == null) { stringToIntegerMap.put(value, "" + maxStringId++); idx = stringToIntegerMap.get(value); } return idx.toString(); } return feature.getValue().toString(); }
@Override public Set<Feature> extract(JCas jcas, TextClassificationTarget unit) throws TextClassificationException { try { String idString = (String) InstanceIdFeature.retrieve(jcas, unit).getValue(); ContextMetaCollectorUtil.addContext(jcas, unit, idString, bw); if (DocumentMetaData.get(jcas).getIsLastSegment() == true) { bw.close(); } } catch (IOException e) { throw new TextClassificationException(e); } return new HashSet<Feature>(); }
private Set<Feature> getSparse(JCas aJCas, TextClassificationTarget aTarget, FeatureExtractorResource_ImplBase aFeatExtractor) throws TextClassificationException { Set<Feature> features = ((FeatureExtractor) aFeatExtractor).extract(aJCas, aTarget); Set<Feature> filtered = new HashSet<>(); for (Feature f : features) { if (!f.isDefaultValue()) { filtered.add(f); } } return filtered; }
public Set<Feature> extract(JCas aView, TextClassificationTarget aTarget) throws TextClassificationException { Feature feature = new Feature(FEATURE_NAME, aTarget.getCoveredText().contains( "-") ? 1 : 0); Set<Feature> features = new HashSet<Feature>(); features.add(feature); return features; } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { Collection<TextClassificationSequence> sequences = JCasUtil.select(jcas, TextClassificationSequence.class); for (TextClassificationSequence seq : sequences) { int id = seq.getId(); for (TextClassificationTarget unit : JCasUtil.selectCovered(jcas, TextClassificationTarget.class, seq)) { String idString; try { idString = (String) InstanceIdFeature.retrieve(jcas, unit, id).getValue(); ContextMetaCollectorUtil.addContext(jcas, unit, idString, bw); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } } } }
private Set<Feature> getSparse(JCas aJCas, TextClassificationTarget aTarget, FeatureExtractorResource_ImplBase aFeatExtractor) throws TextClassificationException { Set<Feature> features = ((FeatureExtractor) aFeatExtractor).extract(aJCas, aTarget); Set<Feature> filtered = new HashSet<>(); for (Feature f : features) { if (!f.isDefaultValue()) { filtered.add(f); } } return filtered; }
protected boolean sanityCheckValue(Feature f) { if (f.getValue() instanceof Number) { return true; } if (f.getName().equals(Constants.ID_FEATURE_NAME)) { return false; } try { Double.valueOf((String) f.getValue()); } catch (Exception e) { throw new IllegalArgumentException( "Feature [" + f.getName() + "] has a non-numeric value [" + f.getValue() + "]", e); } return false; }
@Override public Set<Feature> extract(JCas jcas, TextClassificationTarget classificationUnit) throws TextClassificationException { Collection<Token> tokens = JCasUtil.select(jcas, Token.class); if (tokens.size() > 150) { return new Feature(FEATURE_NAME, LengthEnum.LONG, FeatureType.NOMINAL).asSet(); } else if (tokens.size() > 100) { return new Feature(FEATURE_NAME, LengthEnum.MIDDLE, FeatureType.NOMINAL).asSet(); } else { return new Feature(FEATURE_NAME, LengthEnum.SHORT, FeatureType.NOMINAL).asSet(); } } }
public void collectMetaData(List<Instance> instances) { featureNames = new TreeSet<>(); for (Feature f : instances.get(0).getFeatures()) { featureNames.add(f.getName()); if (!featDesc.containsKey(f.getName())) { featDesc.put(f.getName(), f.getType()); } if (f.getType() == FeatureType.NOMINAL) { enumFeatureName.put(f.getName(), f.getValue().getClass().getName()); } } didCollect = true; }
public Set<Feature> extract(JCas aView, TextClassificationTarget aTarget) throws TextClassificationException { Feature feature = new Feature(FEATURE_NAME, aTarget.getCoveredText().contains( "_") ? 1 : 0); Set<Feature> features = new HashSet<Feature>(); features.add(feature); return features; }
private List<Instance> enforceMatchingFeatures(List<Instance> instances) { if (!isTesting) { return instances; } List<Instance> out = new ArrayList<>(); for (Instance i : instances) { List<Feature> newFeatures = new ArrayList<>(); for (Feature feat : i.getFeatures()) { if (!featureMeta.getFeatureNames().contains(feat.getName())) { continue; } newFeatures.add(feat); } i.setFeatures(newFeatures); out.add(i); } return out; }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { Collection<TextClassificationSequence> sequences = JCasUtil.select(jcas, TextClassificationSequence.class); for (TextClassificationSequence seq : sequences) { int id = seq.getId(); for (TextClassificationTarget unit : JCasUtil.selectCovered(jcas, TextClassificationTarget.class, seq)) { String idString; try { idString = (String) InstanceIdFeature.retrieve(jcas, unit, id).getValue(); ContextMetaCollectorUtil.addContext(jcas, unit, idString, bw); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } } } }
Set<Feature> features = ((FeatureExtractor) featExt).extract(aJCas, aTarget); features.forEach(x -> { if (!x.isDefaultValue()) { instance.addFeature(x);