public Document nextDoc(int initClassID) { Vector<String> words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); while (words != null) { if (words.size() > 0) return new Document(words, initClassID); words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); } return null; } }
public Document nextDoc(int initClassID) { Vector<String> words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); while (words != null) { if (words.size() > 0) return new Document(words, initClassID); words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); } return null; } }
public Document nextDoc(int initClassID) { Vector<String> words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); while (words != null) { if (words.size() > 0) return new Document(words, initClassID); words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); } return null; } }
public void addDocuments(String filename, int classID, StopWords stops, boolean discardFirstToken, String tokenizationDelimiters) { InFile in = new InFile(filename); Vector<String> words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); while (words != null) { if (words.size() >= 0) docs.addElement(new Document(words, classID)); words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); } }
public void addDocuments(String filename, int classID, StopWords stops, boolean discardFirstToken, String tokenizationDelimiters) { InFile in = new InFile(filename); Vector<String> words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); while (words != null) { if (words.size() >= 0) docs.addElement(new Document(words, classID)); words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); } }
public void addDocuments(String filename, int classID, StopWords stops, boolean discardFirstToken, String tokenizationDelimiters) { InFile in = new InFile(filename); Vector<String> words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); while (words != null) { if (words.size() >= 0) docs.addElement(new Document(words, classID)); words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); } }
public void addFolder(String path, int classID, StopWords stops, boolean discardFirstToken, String tokenizationDelimiters) { String[] files = (new File(path)).list(); for (String file : files) { InFile in = new InFile(path + "/" + file); Vector<String> allWords = new Vector<>(); Vector<String> words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); while (words != null) { for (int j = 0; j < words.size(); j++) allWords.addElement(words.elementAt(j)); words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); } docs.addElement(new Document(allWords, classID)); } }
public void addFolder(String path, int classID, StopWords stops, boolean discardFirstToken, String tokenizationDelimiters) { String[] files = (new File(path)).list(); for (String file : files) { InFile in = new InFile(path + "/" + file); Vector<String> allWords = new Vector<>(); Vector<String> words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); while (words != null) { for (int j = 0; j < words.size(); j++) allWords.addElement(words.elementAt(j)); words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); } docs.addElement(new Document(allWords, classID)); } }
public void addFolder(String path, int classID, StopWords stops, boolean discardFirstToken, String tokenizationDelimiters) { String[] files = (new File(path)).list(); for (String file : files) { InFile in = new InFile(path + "/" + file); Vector<String> allWords = new Vector<>(); Vector<String> words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); while (words != null) { for (int j = 0; j < words.size(); j++) allWords.addElement(words.elementAt(j)); words = in.readLineTokens(tokenizationDelimiters); if ((discardFirstToken) && (words != null) && (words.size() > 0)) words.removeElementAt(0); if (stops != null) words = stops.filterStopWords(words); } docs.addElement(new Document(allWords, classID)); } }
public Document(String filename, int _classID, StopWords stops, String tokenizationDelimiters) { InFile in = new InFile(filename); this.classID = _classID; words = new Vector<>(); Vector<String> currentWords = in.readLineTokens(tokenizationDelimiters); while (currentWords != null) { if (stops != null) currentWords = stops.filterStopWords(currentWords); for (int j = 0; j < currentWords.size(); j++) words.addElement(currentWords.elementAt(j)); currentWords = in.readLineTokens(tokenizationDelimiters); } words.trimToSize(); }
public Document(String filename, int _classID, StopWords stops, String tokenizationDelimiters) { InFile in = new InFile(filename); this.classID = _classID; words = new Vector<>(); Vector<String> currentWords = in.readLineTokens(tokenizationDelimiters); while (currentWords != null) { if (stops != null) currentWords = stops.filterStopWords(currentWords); for (int j = 0; j < currentWords.size(); j++) words.addElement(currentWords.elementAt(j)); currentWords = in.readLineTokens(tokenizationDelimiters); } words.trimToSize(); }
public Document(String filename, int _classID, StopWords stops, String tokenizationDelimiters) { InFile in = new InFile(filename); this.classID = _classID; words = new Vector<>(); Vector<String> currentWords = in.readLineTokens(tokenizationDelimiters); while (currentWords != null) { if (stops != null) currentWords = stops.filterStopWords(currentWords); for (int j = 0; j < currentWords.size(); j++) words.addElement(currentWords.elementAt(j)); currentWords = in.readLineTokens(tokenizationDelimiters); } words.trimToSize(); }