public double getKeyFrequency(int i) { return termKeyFreqs.get(i); } }
private ClassificationResult classify(IIndex index, int docID, TreeNode node) { IClassifier classifier = node.getClassifier(); ClassificationResult result = classifier.classify(index, docID); if (result.score.get(0) > _range.border) { if (node.getPositiveChild() != null) return classify(index, docID, node.getPositiveChild()); else { result.categoryID.clear(); result.categoryID.add(node.getPositiveCategories()[0]); return result; } } else { if (node.getNegativeChild() != null) return classify(index, docID, node.getNegativeChild()); else { result.score.setQuick(0, -result.score.getQuick(0)); result.categoryID.clear(); result.categoryID.add(node.getNegativeCategories()[0]); return result; } } }
public double getKeyFrequency(int i) { return termKeyFreqs.get(i); } }
/** * Removes the value at <tt>offset</tt> from the list. * * @param offset an <code>int</code> value * @return the value previously stored at offset. */ public double remove(int offset) { double old = get(offset); remove(offset, 1); return old; }
public void write(String outDir, CotrainOutputData data) throws Exception { java.io.File f = new java.io.File(outDir); f.mkdirs(); String fname = outDir + Os.pathSeparator() + "cotraining.db"; DataOutputStream os = new DataOutputStream(new BufferedOutputStream( new FileOutputStream(fname))); os.writeInt(data.catsThreshold.size()); for (int i = 0; i < data.catsThreshold.size(); i++) { os.writeDouble(data.catsThreshold.get(i)); } // Close the stream. os.close(); }
public ClassificationResult computeScore(KnnCommitteeClassifier cl, Vector<ClassificationResult> results, IIndex testIndex, int docID) { if (results.size() != _matrixes.size()) throw new RuntimeException("The number of matrixes and classifiers must be the same"); ClassificationResult cr = new ClassificationResult(); cr.documentID = docID; for (int i = 0; i < results.get(0).categoryID.size(); i++) { cr.categoryID.add(results.get(0).categoryID.get(i)); cr.score.add(0); } for (int i = 0; i < results.size(); i++) { ClassificationResult res = results.get(i); for (int j = 0; j < res.score.size(); j++) { double val = cr.score.get(j) + (res.score.get(j) * _matrixes.get(i).getWeight(res.categoryID.get(j), docID, 0)); res.score.set(j, val); } } return cr; }
protected void hierarchicallyClassification(short catID, IIndex idx, int docID, ClassificationResult res) { IClassifier c = (IClassifier) _map.get(catID); if (c == null) // No more levels in the hierarchy. return; // Call recursively hierarchical classifier. ClassificationResult r = c.classify(idx, docID); double maxScore = Double.MIN_VALUE; short maxRealCatID = -1; for (short i = 0; i < r.categoryID.size(); i++) { short curCatID = r.categoryID.get(i); TreeBoostClassifierAddress addr = new TreeBoostClassifierAddress(); addr.level = catID; addr.categoryID = curCatID; short realCatID = _mapCatLevel.get2(addr); assert (realCatID != -1); if (r.score.get(i) > maxScore) { maxScore = r.score.get(i); maxRealCatID = realCatID; } if (!res.categoryID.contains(realCatID)) { res.categoryID.add(realCatID); res.score.add(r.score.get(i)); } } hierarchicallyClassification(maxRealCatID, idx, docID, res); }
int docID = docs.next(); ClassificationResult res = results[docID]; double score = res.score.get(0) - cr.border; boolean realPositive = validation.getClassificationDB() .hasDocumentCategory(docID, (short) 0);
protected void classifyPerCategory(IIndex test, IClassifier classifier, IClassificationDBBuilder builder, int idx) { IShortIterator cats = test.getCategoryDB().getCategories(); cats.begin(); while (cats.hasNext()) { short catID = cats.next(); ClassificationResult[] r = classifier.classify(test, catID); for (int i = 0; i < r.length; i++) { ClassificationResult res = r[i]; ClassifierRange cr = classifier .getClassifierRange(res.categoryID.get(0)); if (res.score.get(0) >= cr.border) builder.setDocumentCategory(idx, catID); } } }
protected void classifyPerDocument(IIndex test, IClassifier classifier, IClassificationDBBuilder builder, int idx) { IIntIterator docs = test.getDocumentDB().getDocuments(); docs.begin(); while (docs.hasNext()) { int docID = docs.next(); ClassificationResult res = classifier.classify(test, docID); for (int i = 0; i < res.categoryID.size(); i++) { ClassifierRange cr = classifier .getClassifierRange(res.categoryID.get(i)); if (res.score.get(i) >= cr.border) builder.setDocumentCategory(idx, res.categoryID.get(i)); } } }
public ClassificationResult[] classify(IIndex testIndex, short catID) { ClassificationResult[] r = new ClassificationResult[testIndex.getDocumentDB().getDocumentsCount()]; IIntIterator it = testIndex.getDocumentDB().getDocuments(); while (it.hasNext()) { int docID = it.next(); ClassificationResult res = classify(testIndex, docID); for (int i = 0; i < res.categoryID.size(); i++) { short cat = res.categoryID.get(i); if (cat == catID) { ClassificationResult result = new ClassificationResult(); result.documentID = docID; result.categoryID.add(catID); result.score.add(res.score.get(i)); r[docID] = result; break; } } if (r[docID] == null) { ClassificationResult result = new ClassificationResult(); result.documentID = docID; result.categoryID.add(catID); result.score.add(getClassifierRange(catID).minimum); r[docID] = result; } } return r; }
public ClassificationResult[] classify(IIndex testIndex, short catID) { ClassificationResult[] r = new ClassificationResult[testIndex .getDocumentDB().getDocumentsCount()]; IIntIterator it = testIndex.getDocumentDB().getDocuments(); while (it.hasNext()) { int docID = it.next(); ClassificationResult res = classify(testIndex, docID); for (int i = 0; i < res.categoryID.size(); i++) { short cat = res.categoryID.get(i); if (cat == catID) { ClassificationResult result = new ClassificationResult(); result.documentID = docID; result.categoryID.add(catID); result.score.add(res.score.get(i)); r[docID] = result; break; } } if (r[docID] == null) { ClassificationResult result = new ClassificationResult(); result.documentID = docID; result.categoryID.add(catID); result.score.add(getClassifierRange(catID).minimum); r[docID] = result; } } return r; }
if (res.score.get(0) >= cr.border) builder.setDocumentCategory(res.documentID, catID);
if (res.score.get(i) >= cr.border) builder.setDocumentCategory(docID, res.categoryID.get(i));
protected void classifyPerCategory() { IShortIterator cats = index().getCategoryDB().getCategories(); TextualProgressBar bar = new TextualProgressBar("Classify documents"); bar.signal(0); int numComputed = 0; int numToCompute = index().getCategoryDB().getCategoriesCount(); _lastClassification = new ClassificationScoreDB(index().getDocumentDB().getDocumentsCount()); cats.begin(); while (cats.hasNext()) { short catID = cats.next(); ClassificationResult[] r = _classifier.classify(index(), catID); for (int i = 0; i < r.length; i++) { ClassificationResult res = r[i]; ClassifierRange cr = _classifier.getClassifierRange(res.categoryID.get(0)); _lastClassification.insertScore(res.documentID, catID, res.score.get(0), cr); } numComputed++; // Signal the status of actual operation. bar.signal((numComputed * 100) / numToCompute); } bar.signal(100); }
protected void classifyPerDocument() { IIntIterator docs = index().getDocumentDB().getDocuments(); TextualProgressBar bar = new TextualProgressBar("Classify documents"); bar.signal(0); if (_listener != null) _listener.operationStatus(0); int numComputed = 0; int numToCompute = index().getDocumentDB().getDocumentsCount(); _lastClassification = new ClassificationScoreDB(index().getDocumentDB().getDocumentsCount()); docs.begin(); while (docs.hasNext()) { int docID = docs.next(); ClassificationResult res = _classifier.classify(index(), docID); for (int i = 0; i < res.categoryID.size(); i++) { ClassifierRange cr = _classifier.getClassifierRange(res.categoryID.get(i)); _lastClassification.insertScore(docID, res.categoryID.get(i), res.score.get(i), cr); } numComputed++; // Signal the status of actual operation. bar.signal((numComputed * 100) / numToCompute); if (_listener != null) _listener.operationStatus((numComputed * 100) / numToCompute); } bar.signal(100); if (_listener != null) _listener.operationStatus(100); }
private double generateTestVal(TDoubleArrayList datum) { m_current = 0; m_llcurrent = 0; LogFormula epower = getFormulaObject(LogFormula.Op.EXP); LogFormula featweightsum1 = getFormulaObject(LogFormula.Op.PLUS); for (int i = 0; i < datum.size(); i++) { LogFormula featweight1 = getFormulaObject(LogFormula.Op.TIMES); int paramId = A.getInt("param_"+i); LogFormula formula = getLazyLookupFormulaObjectCustom(paramId,"param_"+i); featweight1.add_arg(formula); featweight1.add_arg(getFormulaObject(LDouble.convertToLogDomain(datum.get(i)))); featweightsum1.add_arg(featweight1); } epower.add_arg(featweightsum1); LogFormula logpart = getFormulaObject(LogFormula.Op.LOG); LogFormula logsum = getFormulaObject(LogFormula.Op.PLUS); logsum.add_arg(getFormulaObject(IdentityElement.TIMES_IDENTITY)); logsum.add_arg(epower); logpart.add_arg(logsum); LogFormula ret = getFormulaObject(LogFormula.Op.PLUS); LogFormula term2 = getFormulaObject(LogFormula.Op.TIMES); term2.add_arg(getFormulaObject(LDouble.convertToLogDomain(-1.0))); term2.add_arg(logpart); ret.add_arg(featweightsum1); ret.add_arg(term2); return ret.evaluate(this).exponentiate(); }
private double generateTestVal(TDoubleArrayList datum) { m_current = 0; m_llcurrent = 0; LogFormula epower = getFormulaObject(LogFormula.Op.EXP); LogFormula featweightsum1 = getFormulaObject(LogFormula.Op.PLUS); for (int i = 0; i < datum.size(); i++) { LogFormula featweight1 = getFormulaObject(LogFormula.Op.TIMES); int paramId = A.getInt("param_"+i); LogFormula formula = getLazyLookupFormulaObjectCustom(paramId,"param_"+i); featweight1.add_arg(formula); featweight1.add_arg(getFormulaObject(LDouble.convertToLogDomain(datum.get(i)))); featweightsum1.add_arg(featweight1); } epower.add_arg(featweightsum1); LogFormula logpart = getFormulaObject(LogFormula.Op.LOG); LogFormula logsum = getFormulaObject(LogFormula.Op.PLUS); logsum.add_arg(getFormulaObject(IdentityElement.TIMES_IDENTITY)); logsum.add_arg(epower); logpart.add_arg(logsum); LogFormula ret = getFormulaObject(LogFormula.Op.PLUS); LogFormula term2 = getFormulaObject(LogFormula.Op.TIMES); term2.add_arg(getFormulaObject(LDouble.convertToLogDomain(-1.0))); term2.add_arg(logpart); ret.add_arg(featweightsum1); ret.add_arg(term2); return ret.evaluate(this).exponentiate(); }
if (res.score.get(i) >= cr.border) builder.setDocumentCategory(docID, res.categoryID.get(i));