public void onlineLearning(Document doc) { weightedOnlineLearning(doc.getActiveFid(map), 1.0, doc.classID); }
public void toCompactFeatureRep(FeatureMap map) { if (words == null) { activeFeatures = null; return; } this.activeFeatures = getActiveFid(map); this.words = null; }
public void weightedOnlineLearning(int[] activeFeatures, double weight, int classID) { sampleSize += weight; classCounts[classID] += weight; fidCount += weight * activeFeatures.length; for (int activeFeature : activeFeatures) { weights[classID] += weight; wordCounts[activeFeature] += weight; updateFidCounts(activeFeature, classID, weight); } }
public static void initTopicClassifier(String pathToTopicData, String[] fileNames, String[] _labelnames) { map = new FeatureMap(); labelnames = new String[_labelnames.length + 1]; labelnames[0] = "UNKNOWN"; for (int i = 0; i < _labelnames.length; i++) labelnames[1 + i] = _labelnames[i]; DocumentCollection docs = new DocumentCollection(); StopWords stops = new StopWords(pathToStopWords); for (int i = 0; i < fileNames.length; i++) docs.addDocuments(pathToTopicData + "/" + fileNames[i], i, stops, false, "\n\t -.,?<>;':\"[]{}\\|`~!@#$%^&*()_+=-0987654321`~"); map.addDocs(docs, 20, false); NfoldCrossvalidation cv = new NfoldCrossvalidation(docs, 5); cv.printNfoldCorrssvalidationNbAcc(fileNames.length, -1, 20); // System.exit(0); nb = new MemoryEfficientNB(docs, map, fileNames.length); }
public DocumentCollection getTrain(int foldId) { DocumentCollection res = new DocumentCollection(); for (int i = 0; i < folds.length; i++) if (i != foldId) res.addDocuments(folds[i].docs); return res; }
public String getExtendedFeatures(Document d) { double[] conf = getPredictionConfidence(d); String res = ""; for (int i = 0; i < classesN; i++) if (conf[i] > 0) res += methodName + i + "(" + conf[i] + ") "; return res; } }
public MemoryEfficientNB(FeatureMap _map, int _classesN) { allocateSpace(_map, _classesN); }
public static void initTopicClassifier(String pathToTopicData, String[] fileNames, String[] _labelnames) { map = new FeatureMap(); labelnames = new String[_labelnames.length + 1]; labelnames[0] = "UNKNOWN"; for (int i = 0; i < _labelnames.length; i++) labelnames[1 + i] = _labelnames[i]; DocumentCollection docs = new DocumentCollection(); StopWords stops = new StopWords(pathToStopWords); for (int i = 0; i < fileNames.length; i++) docs.addDocuments(pathToTopicData + "/" + fileNames[i], i, stops, false, "\n\t -.,?<>;':\"[]{}\\|`~!@#$%^&*()_+=-0987654321`~"); map.addDocs(docs, 20, false); NfoldCrossvalidation cv = new NfoldCrossvalidation(docs, 5); cv.printNfoldCorrssvalidationNbAcc(fileNames.length, -1, 20); // System.exit(0); nb = new MemoryEfficientNB(docs, map, fileNames.length); }
public void onlineLearning(Document doc) { weightedOnlineLearning(doc.getActiveFid(map), 1.0, doc.classID); }
public DocumentCollection getTrain(int foldId) { DocumentCollection res = new DocumentCollection(); for (int i = 0; i < folds.length; i++) if (i != foldId) res.addDocuments(folds[i].docs); return res; }
public void toCompactFeatureRep(FeatureMap map) { if (words == null) { activeFeatures = null; return; } this.activeFeatures = getActiveFid(map); this.words = null; }
public void weightedOnlineLearning(int[] activeFeatures, double weight, int classID) { sampleSize += weight; classCounts[classID] += weight; fidCount += weight * activeFeatures.length; for (int activeFeature : activeFeatures) { weights[classID] += weight; wordCounts[activeFeature] += weight; updateFidCounts(activeFeature, classID, weight); } }
public String getExtendedFeatures(Document d) { double[] conf = getPredictionConfidence(d); String res = ""; for (int i = 0; i < classesN; i++) if (conf[i] > 0) res += methodName + i + "(" + conf[i] + ") "; return res; } }
public MemoryEfficientNB(FeatureMap _map, int _classesN) { allocateSpace(_map, _classesN); }
public static void initTopicClassifier(String pathToTopicData, String[] fileNames, String[] _labelnames) { map = new FeatureMap(); labelnames = new String[_labelnames.length + 1]; labelnames[0] = "UNKNOWN"; for (int i = 0; i < _labelnames.length; i++) labelnames[1 + i] = _labelnames[i]; DocumentCollection docs = new DocumentCollection(); StopWords stops = new StopWords(pathToStopWords); for (int i = 0; i < fileNames.length; i++) docs.addDocuments(pathToTopicData + "/" + fileNames[i], i, stops, false, "\n\t -.,?<>;':\"[]{}\\|`~!@#$%^&*()_+=-0987654321`~"); map.addDocs(docs, 20, false); NfoldCrossvalidation cv = new NfoldCrossvalidation(docs, 5); cv.printNfoldCorrssvalidationNbAcc(fileNames.length, -1, 20); // System.exit(0); nb = new MemoryEfficientNB(docs, map, fileNames.length); }
public void onlineLearning(Document doc) { weightedOnlineLearning(doc.getActiveFid(map), 1.0, doc.classID); }
public DocumentCollection getTrain(int foldId) { DocumentCollection res = new DocumentCollection(); for (int i = 0; i < folds.length; i++) if (i != foldId) res.addDocuments(folds[i].docs); return res; }
public void toCompactFeatureRep(FeatureMap map) { if (words == null) { activeFeatures = null; return; } this.activeFeatures = getActiveFid(map); this.words = null; }
public void weightedOnlineLearning(int[] activeFeatures, double weight, int classID) { sampleSize += weight; classCounts[classID] += weight; fidCount += weight * activeFeatures.length; for (int activeFeature : activeFeatures) { weights[classID] += weight; wordCounts[activeFeature] += weight; updateFidCounts(activeFeature, classID, weight); } }
public String getExtendedFeatures(Document d) { double[] conf = getPredictionConfidence(d); String res = ""; for (int i = 0; i < classesN; i++) if (conf[i] > 0) res += methodName + i + "(" + conf[i] + ") "; return res; } }