public static <T> Counter<T> toCounter(double[] counts, Index<T> index) { if (index.size() < counts.length) throw new IllegalArgumentException("Index not large enough to name all the array elements!"); Counter<T> c = new ClassicCounter<>(); for (int i = 0; i < counts.length; i++) { if (counts[i] != 0.0) c.setCount(index.get(i), counts[i]); } return c; }
public EntityCachingAbstractSequencePrior(String backgroundSymbol, Index<String> classIndex, List<IN> doc) { this.classIndex = classIndex; this.backgroundSymbol = classIndex.indexOf(backgroundSymbol); this.numClasses = classIndex.size(); this.possibleValues = new int[numClasses]; for (int i=0; i<numClasses; i++) { possibleValues[i] = i; } this.doc = doc; }
classIndex = new HashIndex<>(); classIndex.add(flags.backgroundSymbol); throw new IllegalArgumentException("Word " + wordCount + " (\"" + token.get(CoreAnnotations.TextAnnotation.class) + "\") has a blank answer"); classIndex.add(ans); if (labelDictionary != null) { String observation = token.get(CoreAnnotations.TextAnnotation.class); labelIndex.add(d.label()); Index<Integer> featureIndexMap = new HashIndex<>(); featureIndex.addAll(featureIndices[i]); for (String str : featureIndices[i]) { int index = featureIndex.indexOf(str); map[index] = i; featureIndexMap.add(index); int groupIndex = templateGroupIndex.addToIndex(groupSuffix); featureIndexToTemplateIndex.put(index, groupIndex); for (int i = 0, liSize = labelIndex.size(); i < liSize; i++) { CRFLabel label = labelIndex.get(i); for (int j = windowSize - 2; j >= 0; j--) { label = label.getOneSmallerLabel(); labelIndices.get(j).add(label);
labelIndex = data.labelIndex; for (int i = 0; i < data.size(); i++) { labels.add(labelIndex.get(labelsArr[i])); labelIndex.addAll(data.labelIndex().objectsList()); labelIndex.addAll(classifier.labels()); int numClasses = labelIndex.size(); tpCount = new int[numClasses]; fpCount = new int[numClasses]; fnCount = new int[numClasses]; negIndex = labelIndex.indexOf(negLabel); int guessIndex = labelIndex.indexOf(guess); L label = labels.get(i); int trueIndex = labelIndex.indexOf(label);
public Classifier<L, F> trainClassifier(Iterable<Datum<L, F>> dataIterable) { Minimizer<DiffFunction> minimizer = getMinimizer(); Index<F> featureIndex = Generics.newIndex(); Index<L> labelIndex = Generics.newIndex(); for (Datum<L, F> d : dataIterable) { labelIndex.add(d.label()); featureIndex.addAll(d.asFeatures());//If there are duplicates, it doesn't add them again. } logger.info(String.format("Training linear classifier with %d features and %d labels", featureIndex.size(), labelIndex.size())); LogConditionalObjectiveFunction<L, F> objective = new LogConditionalObjectiveFunction<>(dataIterable, logPrior, featureIndex, labelIndex); // [cdm 2014] Commented out next line. Why not use the logPrior set up previously and used at creation??? // objective.setPrior(new LogPrior(LogPrior.LogPriorType.QUADRATIC)); double[] initial = objective.initial(); double[] weights = minimizer.minimize(objective, TOL, initial); LinearClassifier<L, F> classifier = new LinearClassifier<>(objective.to2D(weights), featureIndex, labelIndex); return classifier; }
public <F> double score(Classifier<L,F> classifier, GeneralDataset<L,F> data) { labelIndex = new HashIndex<>(); labelIndex.addAll(classifier.labels()); labelIndex.addAll(data.labelIndex.objectsList()); clearCounts(); int[] labelsArr = data.getLabelsArray(); for (int i = 0; i < data.size(); i++) { Datum<L, F> d = data.getRVFDatum(i); L guess = classifier.classOf(d); addGuess(guess, labelIndex.get(labelsArr[i])); } finalizeCounts(); return getFMeasure(); }
protected List<Item> makeInitialItems(List<? extends HasWord> wordList) { List<Item> itemList = new ArrayList<>(); int length = wordList.size(); int numTags = tagIndex.size(); words = new int[length]; taggedWordList = new List[length]; if (!wordIndex.contains(wordStr)) { wordStr = Lexicon.UNKNOWN_WORD; int word = wordIndex.indexOf(wordStr); words[i] = word; for (Iterator<IntTaggedWord> tagI = lex.ruleIteratorByWord(word, i, wordContextStr); tagI.hasNext(); ) { int state = stateIndex.indexOf(tagIndex.get(tag));
public String toSummaryString() { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); pw.println("Number of data points: " + size()); pw.print("Number of labels: " + labelIndex.size() + " ["); Iterator<L> iter = labelIndex.iterator(); while (iter.hasNext()) { pw.print(iter.next()); if (iter.hasNext()) { pw.print(", "); } } pw.println("]"); pw.println("Number of features (Phi(X) types): " + featureIndex.size()); pw.println("Number of active feature types: " + numFeatureTypes()); pw.println("Number of active feature tokens: " + numFeatureTokens()); return sw.toString(); }
BufferedWriter out = new BufferedWriter(new FileWriter(tgtFile)); labelIndex.saveToWriter(out); featureIndex.saveToWriter(out); int numLabels = labelIndex.size(); int numFeatures = featureIndex.size(); for (int featIndex=0; featIndex<numFeatures; featIndex++) { for (int labelIndex=0;labelIndex<numLabels;labelIndex++) {
sb.append("numDatums: ").append(size).append('\n'); sb.append("numDatumsPerLabel: ").append(this.numDatumsPerLabel()).append('\n'); sb.append("numLabels: ").append(labelIndex.size()).append(" ["); Iterator<L> iter = labelIndex.iterator(); while (iter.hasNext()) { sb.append(iter.next()); sb.append("numFeatures (Phi(X) types): ").append(featureIndex.size()).append(" ["); int sz = Math.min(5, featureIndex.size()); for (int i = 0; i < sz; i++) { if (i > 0) { sb.append(", "); sb.append(featureIndex.get(i)); if (sz < featureIndex.size()) { sb.append(", ...");
public OutsideRuleFilter(BinaryGrammar bg, Index<String> stateIndex, Index<String> tagIndex) { this.tagIndex = tagIndex; int numStates = stateIndex.size(); numTags = tagIndex.size(); allocate(numStates); for (int state = 0; state < numStates; state++) { String stateStr = stateIndex.get(state); List<String> left = new ArrayList<>(); List<String> right = new ArrayList<>(); if (sb.length() > 0) { String str = sb.toString(); if (!tagIndex.contains(str)) { str = null;
public double getBestScore(String stateName) { if (length > arraySize) { return Double.NEGATIVE_INFINITY; } if (!stateIndex.contains(stateName)) { return Double.NEGATIVE_INFINITY; } int goal = stateIndex.indexOf(stateName); if (iScore == null || iScore.length == 0 || iScore[0].length <= length || iScore[0][length].length <= goal) { return Double.NEGATIVE_INFINITY; } return iScore[0][length][goal]; }
String goalStr = stateIndex.get(goal); if (end - start == 1 && tagIndex.contains(goalStr)) { IntTaggedWord tagging = new IntTaggedWord(words[start], tagIndex.indexOf(goalStr)); String contextStr = getCoreLabel(start).originalText(); float tagScore = lex.score(tagging, start, wordIndex.get(words[start]), contextStr); if (tagScore > Float.NEGATIVE_INFINITY || floodTags) { String wordStr = wordIndex.get(words[start]); Tree wordNode = tf.newLeaf(wordStr); Tree tagNode = tf.newTreeNode(goalStr, Collections.singletonList(wordNode));
this.classIndex.addAll(crf.classIndex.objectsList()); int oldNumFeatures1 = this.featureIndex.size(); int oldNumFeatures2 = crf.featureIndex.size(); int oldNumWeights1 = this.getNumWeights(); int oldNumWeights2 = crf.getNumWeights(); this.featureIndex.addAll(crf.featureIndex.objectsList()); this.knownLCWords.addAll(crf.knownLCWords); assert (weights.length == oldNumFeatures1); this.labelIndices.get(i).addAll(crf.labelIndices.get(i).objectsList()); int numFeatures = featureIndex.size(); int numWeights = getNumWeights(); long elapsedMs = timer.stop();
private int[] objectArrayToIntArray(E[] os) { int[] is = new int[os.length]; for (int i = 0; i < os.length; i++) { is[i] = classIndex.indexOf(os[i]); } return is; }
@Override public Collection<L> labels() { return labelIndex.objectsList(); }