private Integer getIndexFeatureIndex(TridentTuple args) { return ((Number) args.get(0)).intValue(); }
@SuppressWarnings({ "unchecked", "rawtypes" }) protected TextInstance<L> createInstance(TridentTuple tuple) { L label = this.withLabel ? (L) tuple.get(0) : null; String text = tuple.getString(this.withLabel ? 1 : 0); List<String> tokens = this.extractTokens(text); TextInstance<L> instance = new TextInstance(label, tokens); return instance; }
@SuppressWarnings("unchecked") protected Instance<L> createInstance(TridentTuple tuple) { Instance<L> instance = null; if (this.withLabel) { L label = (L) tuple.get(0); double[] features = new double[tuple.size() - 1]; for (int i = 1; i < tuple.size(); i++) { features[i - 1] = tuple.getDouble(i); } instance = new Instance<L>(label, features); } else { double[] features = new double[tuple.size()]; for (int i = 0; i < tuple.size(); i++) { features[i] = tuple.getDouble(i); } instance = new Instance<L>(features); } return instance; } }
protected List<Instance<?>> extractInstances(List<TridentTuple> tuples) { List<Instance<?>> instances = new ArrayList<Instance<?>>(); Instance<?> instance; for (TridentTuple tuple : tuples) { instance = (Instance<?>) tuple.get(0); instances.add(instance); } return instances; }
@Override public boolean isKeep(TridentTuple tuple) { Map<String,Integer> val = (Map<String,Integer>)tuple.get(0); return val != null && val.keySet().contains("Spain"); } }
@Override public boolean isKeep(TridentTuple tuple) { User user = (User)tuple.get(0); return "en".equals(user.getLang()); } }
@Override public boolean isKeep(TridentTuple tuple) { Content content = (Content)tuple.get(0); return "hashtag".equals(content.getContentType()); } }
@Override public void execute(TridentTuple tuple, TridentCollector collector) { Map<String,Integer> val = (Map<String,Integer>)tuple.get(0); Map<String,Integer> ret = new HashMap<String, Integer>(); for (Map.Entry<String, Integer> e : val.entrySet()) { ret.put(e.getKey(), e.getValue() * 10); } collector.emit(new Values(ret)); } }
@Override public void execute(TridentTuple tuple, TridentCollector collector) { Instance<?> instance = (Instance<?>) tuple.get(0); StreamStatistics streamStatistics = (StreamStatistics) tuple.get(1); Instance<?> standardizedInstance = this.standardize(instance, streamStatistics); collector.emit(new Values(standardizedInstance)); }
@Override public List<Integer> batchRetrieve(MapState<Clusterer> state, List<TridentTuple> tuples) { List<Integer> clusterIndexes = new ArrayList<Integer>(); List<Clusterer> clusterers = state.multiGet(KeysUtil.toKeys(this.clustererName)); if (clusterers != null && !clusterers.isEmpty()) { Clusterer clusterer = clusterers.get(0); Integer clustererIndex; Instance<?> instance; for (TridentTuple tuple : tuples) { instance = (Instance<?>) tuple.get(0); clustererIndex = clusterer.classify(instance.features); clusterIndexes.add(clustererIndex); } } return clusterIndexes; }
@SuppressWarnings("unchecked") @Override public List<L> batchRetrieve(MapState<TextClassifier<L>> state, List<TridentTuple> tuples) { List<L> labels = new ArrayList<L>(); List<TextClassifier<L>> classifiers = state.multiGet(KeysUtil.toKeys(this.classifierName)); if (classifiers != null && !classifiers.isEmpty()) { TextClassifier<L> classifier = classifiers.get(0); L label; TextInstance<L> instance; for (TridentTuple tuple : tuples) { instance = (TextInstance<L>) tuple.get(0); label = classifier.classify(instance.tokens); labels.add(label); } } return labels; }
@Override public void execute(TridentTuple tuple, TridentCollector collector) { Instance<?> instance = (Instance<?>) tuple.get(0); Instance<?> normalizedInstance = this.normalize(instance); collector.emit(new Values(normalizedInstance)); }
@Override public boolean isKeep(TridentTuple tuple) { Status status = (Status) tuple.get(0); return !(null == status.getPlace() || null == status.getPlace().getCountryCode()); } }
@Override public void updateState(MapState<Clusterer> state, List<TridentTuple> tuples, TridentCollector collector) { // Get model List<Clusterer> clusterers = state.multiGet(KeysUtil.toKeys(this.clustererName)); Clusterer clusterer = null; if (clusterers != null && !clusterers.isEmpty()) { clusterer = clusterers.get(0); } // Init it if necessary if (clusterer == null) { clusterer = this.initialClusterer; } // Update model Instance<?> instance; for (TridentTuple tuple : tuples) { instance = (Instance<?>) tuple.get(0); clusterer.update(instance.features); } // Save model state.multiPut(KeysUtil.toKeys(this.clustererName), Arrays.asList(clusterer)); }
@SuppressWarnings("unchecked") @Override public void updateState(MapState<Regressor> state, List<TridentTuple> tuples, TridentCollector collector) { // Get model List<Regressor> regressors = state.multiGet(KeysUtil.toKeys(this.classifierName)); Regressor regressor = null; if (regressors != null && !regressors.isEmpty()) { regressor = regressors.get(0); } // Init it if necessary if (regressor == null) { regressor = this.initialRegressor; } // Update model Instance<Double> instance; for (TridentTuple tuple : tuples) { instance = (Instance<Double>) tuple.get(0); regressor.update(instance.label, instance.features); } // Save model state.multiPut(KeysUtil.toKeys(this.classifierName), Arrays.asList(regressor)); }
@Override public void execute(TridentTuple tuple, TridentCollector collector) { if(extracter == null) extracter = new ContentExtracter(); String rawTweetJson = (String)tuple.get(0); Status parsed = parse(rawTweetJson); User user = parsed.getUser(); for (Content content : extracter.extract(parsed)) { collector.emit(new Values(parsed, content, user)); } }