/** * Constructs a reader. */ public TermDocumentMatrixBuilder(String matrixName, File stopwordFile, File keywordFile) throws IOException { totalKW = 0; keywordSet = new TermSet(); keywordSet.read(new FileReader(keywordFile)); logger.info("keyword to be indexed: " + keywordSet.size()); stopwordSet = new TermSet(); stopwordSet.read(new FileReader(stopwordFile)); logger.info(stopwordFile + "(" + stopwordSet.size() + ")"); lengthFreq = new int[101]; columnCount = 0; matrixFile = new File(matrixName + "-matrix"); rowFile = new File(matrixName + "-row"); colFile = new File(matrixName + "-col"); dfFile = new File(matrixName + "-df"); termIndex = new Index(); documentIndex = new Index(); matrixWriter = new SparseBinaryMatrixFileWriter(matrixFile); corpusVocabulary = new Vocabulary(); } // end constructor
corpusVocabulary = new Vocabulary();
Vocabulary voc = new Vocabulary();
Vocabulary voc = new Vocabulary();
Vocabulary voc = new Vocabulary();
protected void addDocument(String[] array) throws IOException { Vocabulary documentVocabulary = new Vocabulary();
private void addDocument(String[] array) throws IOException { Vocabulary documentVocabulary = new Vocabulary();