public MemoryEfficientNB(String file) { methodName = file.substring(file.lastIndexOf('/') + 1, file.length()); map = new FeatureMap(file + ".nb.featuremap"); InFile in = new InFile(file); sampleSize = Double.parseDouble(in.readLine()); classesN = Integer.parseInt(in.readLine()); weights = new double[classesN]; for (int i = 0; i < weights.length; i++) weights[i] = Double.parseDouble(in.readLine()); classCounts = new double[classesN]; for (int i = 0; i < classCounts.length; i++) classCounts[i] = Double.parseDouble(in.readLine()); wordCounts = new double[map.dim]; for (int i = 0; i < map.dim; i++) wordCounts[i] = Double.parseDouble(in.readLine()); fidCount = Double.parseDouble(in.readLine()); int hashesN = Integer.parseInt(in.readLine()); fidCounts = new Vector<>(); for (int i = 0; i < hashesN; i++) { Hashtable<Integer, Double> h = new Hashtable<>(); int hSize = Integer.parseInt(in.readLine()); for (int k = 0; k < hSize; k++) { int fid = Integer.parseInt(in.readLine()); double val = Double.parseDouble(in.readLine()); h.put(fid, val); } fidCounts.addElement(h); } in.close(); }
DocumentCollection train = getTrain(i); DocumentCollection test = getTest(i); FeatureMap map = new FeatureMap(); map.addDocs(train, minWordsAppearenceCount, false); MemoryEfficientNB nb = new MemoryEfficientNB(train, map, classesNum);
DocumentCollection train = getTrain(i); DocumentCollection test = getTest(i); FeatureMap map = new FeatureMap(); map.addDocs(train, minWordsAppearenceCount, false); MemoryEfficientNB nb = new MemoryEfficientNB(train, map, classesNum);
public MemoryEfficientNB(String file) { methodName = file.substring(file.lastIndexOf('/') + 1, file.length()); map = new FeatureMap(file + ".nb.featuremap"); InFile in = new InFile(file); sampleSize = Double.parseDouble(in.readLine()); classesN = Integer.parseInt(in.readLine()); weights = new double[classesN]; for (int i = 0; i < weights.length; i++) weights[i] = Double.parseDouble(in.readLine()); classCounts = new double[classesN]; for (int i = 0; i < classCounts.length; i++) classCounts[i] = Double.parseDouble(in.readLine()); wordCounts = new double[map.dim]; for (int i = 0; i < map.dim; i++) wordCounts[i] = Double.parseDouble(in.readLine()); fidCount = Double.parseDouble(in.readLine()); int hashesN = Integer.parseInt(in.readLine()); fidCounts = new Vector<>(); for (int i = 0; i < hashesN; i++) { Hashtable<Integer, Double> h = new Hashtable<>(); int hSize = Integer.parseInt(in.readLine()); for (int k = 0; k < hSize; k++) { int fid = Integer.parseInt(in.readLine()); double val = Double.parseDouble(in.readLine()); h.put(fid, val); } fidCounts.addElement(h); } in.close(); }
DocumentCollection train = getTrain(i); DocumentCollection test = getTest(i); FeatureMap map = new FeatureMap(); map.addDocs(train, minWordsAppearenceCount, false); MemoryEfficientNB nb = new MemoryEfficientNB(train, map, classesNum);
public MemoryEfficientNB(String file) { methodName = file.substring(file.lastIndexOf('/') + 1, file.length()); map = new FeatureMap(file + ".nb.featuremap"); InFile in = new InFile(file); sampleSize = Double.parseDouble(in.readLine()); classesN = Integer.parseInt(in.readLine()); weights = new double[classesN]; for (int i = 0; i < weights.length; i++) weights[i] = Double.parseDouble(in.readLine()); classCounts = new double[classesN]; for (int i = 0; i < classCounts.length; i++) classCounts[i] = Double.parseDouble(in.readLine()); wordCounts = new double[map.dim]; for (int i = 0; i < map.dim; i++) wordCounts[i] = Double.parseDouble(in.readLine()); fidCount = Double.parseDouble(in.readLine()); int hashesN = Integer.parseInt(in.readLine()); fidCounts = new Vector<>(); for (int i = 0; i < hashesN; i++) { Hashtable<Integer, Double> h = new Hashtable<>(); int hSize = Integer.parseInt(in.readLine()); for (int k = 0; k < hSize; k++) { int fid = Integer.parseInt(in.readLine()); double val = Double.parseDouble(in.readLine()); h.put(fid, val); } fidCounts.addElement(h); } in.close(); }
public static void initTopicClassifier(String pathToTopicData, String[] fileNames, String[] _labelnames) { map = new FeatureMap(); labelnames = new String[_labelnames.length + 1]; labelnames[0] = "UNKNOWN"; for (int i = 0; i < _labelnames.length; i++) labelnames[1 + i] = _labelnames[i]; DocumentCollection docs = new DocumentCollection(); StopWords stops = new StopWords(pathToStopWords); for (int i = 0; i < fileNames.length; i++) docs.addDocuments(pathToTopicData + "/" + fileNames[i], i, stops, false, "\n\t -.,?<>;':\"[]{}\\|`~!@#$%^&*()_+=-0987654321`~"); map.addDocs(docs, 20, false); NfoldCrossvalidation cv = new NfoldCrossvalidation(docs, 5); cv.printNfoldCorrssvalidationNbAcc(fileNames.length, -1, 20); // System.exit(0); nb = new MemoryEfficientNB(docs, map, fileNames.length); }
public static void initTopicClassifier(String pathToTopicData, String[] fileNames, String[] _labelnames) { map = new FeatureMap(); labelnames = new String[_labelnames.length + 1]; labelnames[0] = "UNKNOWN"; for (int i = 0; i < _labelnames.length; i++) labelnames[1 + i] = _labelnames[i]; DocumentCollection docs = new DocumentCollection(); StopWords stops = new StopWords(pathToStopWords); for (int i = 0; i < fileNames.length; i++) docs.addDocuments(pathToTopicData + "/" + fileNames[i], i, stops, false, "\n\t -.,?<>;':\"[]{}\\|`~!@#$%^&*()_+=-0987654321`~"); map.addDocs(docs, 20, false); NfoldCrossvalidation cv = new NfoldCrossvalidation(docs, 5); cv.printNfoldCorrssvalidationNbAcc(fileNames.length, -1, 20); // System.exit(0); nb = new MemoryEfficientNB(docs, map, fileNames.length); }
public static void initTopicClassifier(String pathToTopicData, String[] fileNames, String[] _labelnames) { map = new FeatureMap(); labelnames = new String[_labelnames.length + 1]; labelnames[0] = "UNKNOWN"; for (int i = 0; i < _labelnames.length; i++) labelnames[1 + i] = _labelnames[i]; DocumentCollection docs = new DocumentCollection(); StopWords stops = new StopWords(pathToStopWords); for (int i = 0; i < fileNames.length; i++) docs.addDocuments(pathToTopicData + "/" + fileNames[i], i, stops, false, "\n\t -.,?<>;':\"[]{}\\|`~!@#$%^&*()_+=-0987654321`~"); map.addDocs(docs, 20, false); NfoldCrossvalidation cv = new NfoldCrossvalidation(docs, 5); cv.printNfoldCorrssvalidationNbAcc(fileNames.length, -1, 20); // System.exit(0); nb = new MemoryEfficientNB(docs, map, fileNames.length); }