/** * Creates a transition-based parser using a MLP transition classifier. * @param jsc * @param classifierFileName * @param featureFrame */ public TransitionBasedParserMLP(JavaSparkContext jsc, String classifierFileName, FeatureFrame featureFrame) { this.featureFrame = featureFrame; this.classifier = TransitionClassifier.load(jsc, new Path(classifierFileName, "data").toString()); this.pipelineModel = PipelineModel.load(new Path(classifierFileName, "pipelineModel").toString()); this.transitionName = ((StringIndexerModel)pipelineModel.stages()[2]).labels(); String[] features = ((CountVectorizerModel)(pipelineModel.stages()[1])).vocabulary(); this.featureMap = new HashMap<String, Integer>(); for (int j = 0; j < features.length; j++) { this.featureMap.put(features[j], j); } }
@Override public StringIndexerModelInfo getModelInfo(final StringIndexerModel from) { final String[] labels = from.labels(); final Map<String, Double> labelToIndex = new HashMap<String, Double>(); for (int i = 0; i < labels.length; i++) { labelToIndex.put(labels[i], (double) i); } final StringIndexerModelInfo modelInfo = new StringIndexerModelInfo(); modelInfo.setLabelToIndex(labelToIndex); Set<String> inputKeys = new LinkedHashSet<String>(); inputKeys.add(from.getInputCol()); modelInfo.setInputKeys(inputKeys); Set<String> outputKeys = new LinkedHashSet<String>(); outputKeys.add(from.getOutputCol()); modelInfo.setOutputKeys(outputKeys); return modelInfo; }
@Override public StringIndexerModelInfo getModelInfo(final StringIndexerModel from, DataFrame df) { final String[] labels = from.labels(); final Map<String, Double> labelToIndex = new HashMap<String, Double>(); for (int i = 0; i < labels.length; i++) { labelToIndex.put(labels[i], (double) i); } final StringIndexerModelInfo modelInfo = new StringIndexerModelInfo(); modelInfo.setLabelToIndex(labelToIndex); Set<String> inputKeys = new LinkedHashSet<String>(); inputKeys.add(from.getInputCol()); modelInfo.setInputKeys(inputKeys); Set<String> outputKeys = new LinkedHashSet<String>(); outputKeys.add(from.getOutputCol()); modelInfo.setOutputKeys(outputKeys); return modelInfo; }
/** * Creates a conditional Markov model. * @param pipelineModel * @param weights * @param markovOrder */ public CMMModel(PipelineModel pipelineModel, Vector weights, MarkovOrder markovOrder, Map<String, Set<Integer>> tagDictionary) { this.pipelineModel = pipelineModel; this.contextExtractor = new ContextExtractor(markovOrder, Constants.REGEXP_FILE); this.weights = weights; this.tags = ((StringIndexerModel)(pipelineModel.stages()[2])).labels(); String[] features = ((CountVectorizerModel)(pipelineModel.stages()[1])).vocabulary(); featureMap = new HashMap<String, Integer>(); for (int j = 0; j < features.length; j++) { featureMap.put(features[j], j); } this.tagDictionary = tagDictionary; }
categories.addAll(Arrays.asList(transformer.labels()));